From 75cf8f8c2a12e2dfdb365d9248a50d137976bb5f Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 2 Jul 2012 01:01:25 -0400
Subject: [PATCH 001/168] Initial per-sample line filtering.

---
 scripts/vcf_sample_filter.py | 42 ++++++++++++++++++++++++++++++++++++
 vcf/parser.py                | 34 ++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 scripts/vcf_sample_filter.py

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
new file mode 100644
index 0000000..fb987ff
--- /dev/null
+++ b/scripts/vcf_sample_filter.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+import sys
+
+import vcf
+#from parser import Reader, Writer
+
+class SampleFilter(object):
+    def __init__(self, infile, outfile, arg=None):
+        self.parser = Reader(filename=infile)
+        self.samples = self.parser.samples
+        self.outfile = outfile
+        if arg is not None:
+            self.set_filters(arg)
+            self.write()
+        else:
+            print "Samples:"
+            for idx, val in enumerate(self.list_samples()):
+                print "{0}: {1}".format(idx, val)
+
+    def list_samples(self):
+        return self.samples
+
+    def set_filters(self, filters, invert=False):
+        filters = filters.split(",")
+        if invert:
+            #filters = 
+            pass
+
+        self.parser.set_sample_filter(filters)
+
+    def write(self):
+        #writer = Writer(stream=self.outfile, template=self.parser)
+        test_row = self.parser.next()
+        print test_row.samples
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print "Usage: script.py infile outfile [filt1,filt2]"
+        if len(sys.argv) < 3:
+            raise SystemExit
+
+    filt = SampleFilter(*sys.argv[1:])
diff --git a/vcf/parser.py b/vcf/parser.py
index adafbd0..11c8454 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -172,7 +172,7 @@ def read_meta(self, meta_string):
 class Reader(object):
     """ Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
 
-    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False):
+    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False, samp_filter=None):
         """ Create a new Reader for a VCF file.
 
             You must specify either fsock (stream) or filename.  Gzipped streams
@@ -215,6 +215,7 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self._prepend_chr = prepend_chr
         self._parse_metainfo()
         self._format_cache = {}
+        self.set_sample_filter(samp_filter)
 
     def __iter__(self):
         return self
@@ -319,6 +320,34 @@ def _parse_info(self, info_str):
 
         return retdict
 
+    def set_sample_filter(self, samp_filter):
+        self._samp_filter = None
+        if samp_filter is None:
+            return None
+        if isinstance(samp_filter, basestring):
+            samp_filter = samp_filter.split(",")
+        # if filters aren't ints, try to convert to sample indices
+        try:
+            samp_filter = [int(x) for x in samp_filter]
+        except ValueError:
+            try:
+                samp_filter = [self._sample_indexes[samp] for samp in samp_filter]
+            except KeyError:
+                # TODO raise RuntimeWarning about sample not found
+                pass
+        self._samp_filter = samp_filter
+
+    def _filter_samples(self, samples):
+        if self._samp_filter is None:
+            return samples
+        filt = self._samp_filter
+        self.samples = [val for idx,val in enumerate(self.samples) if idx not in filt]
+        samples = [val for idx,val in enumerate(samples) if idx not in filt]
+        # FIXME this loop doesn't alter the originals
+        #for samplist in (self.samples, samples):
+            #samplist = [val for idx,val in enumerate(samplist) if idx not in filt]
+        return samples
+
     def _parse_sample_format(self, samp_fmt):
         """ Parse the format of the calls in this _Record """
         samp_fmt = make_calldata_tuple(samp_fmt.split(':'))
@@ -351,6 +380,9 @@ def _parse_samples(self, samples, samp_fmt, site):
 
         samp_fmt = self._format_cache[samp_fmt]
 
+        # filter samples
+        samples = self._filter_samples(samples)
+
         if cparse:
             return cparse.parse_samples(
                 self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site)

From 18deb2a523eaa127b8a04e50639205e840a205c8 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 2 Jul 2012 13:56:25 -0400
Subject: [PATCH 002/168] Improved samp filter performance, allow invert.

---
 scripts/vcf_sample_filter.py | 39 ++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index fb987ff..a3b2fb6 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -1,16 +1,18 @@
 #!/usr/bin/env python
 import sys
+import warnings
 
-import vcf
+from vcf import Reader, Writer
 #from parser import Reader, Writer
 
 class SampleFilter(object):
-    def __init__(self, infile, outfile, arg=None):
+    def __init__(self, infile, outfile, filters=None, **kwarg):
         self.parser = Reader(filename=infile)
         self.samples = self.parser.samples
+        self.smp_idx = dict([(v,k) for k,v in enumerate(self.samples)])
         self.outfile = outfile
-        if arg is not None:
-            self.set_filters(arg)
+        if filters is not None:
+            self.set_filters(filters, **kwarg)
             self.write()
         else:
             print "Samples:"
@@ -20,11 +22,29 @@ def __init__(self, infile, outfile, arg=None):
     def list_samples(self):
         return self.samples
 
-    def set_filters(self, filters, invert=False):
-        filters = filters.split(",")
+    def set_filters(self, filters, invert=False, **kwarg):
+        filt_l = filters.split(",")
+        filt_s = set(filt_l)
+        if len(filt_s) < len(filt_l):
+            warnings.warn("Non-unique filters, ignoring", RuntimeWarning)
+        def filt2idx(item):
+            """Convert filter to valid sample index"""
+            try:
+                item = int(item)
+            except ValueError:
+                # not an idx, check if it's a value
+                return self.smp_idx.get(item)
+            else:
+                # is int, check if it's an idx
+                if item < len(self.samples):
+                    return item
+        filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s)))
+        if len(filters) < len(filt_s):
+            # TODO print the filters that were ignored
+            warnings.warn("Invalid filters, ignoring", RuntimeWarning)
+
         if invert:
-            #filters = 
-            pass
+            filters = set(xrange(len(self.samples))).difference(filters)
 
         self.parser.set_sample_filter(filters)
 
@@ -34,9 +54,12 @@ def write(self):
         print test_row.samples
 
 if __name__ == "__main__":
+    # TODO implement argparse
     if len(sys.argv) < 4:
         print "Usage: script.py infile outfile [filt1,filt2]"
         if len(sys.argv) < 3:
             raise SystemExit
 
     filt = SampleFilter(*sys.argv[1:])
+    print "now invert:"
+    filt2 = SampleFilter(*sys.argv[1:], invert=True)

From 8477e6f0ba788f02b8673e1d852a7c1c52f3e837 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 2 Jul 2012 16:28:52 -0400
Subject: [PATCH 003/168] Args can be provided all at once or in sequence.

The latter style (filt3) allows semi-interactive at Python prompt.
---
 scripts/vcf_sample_filter.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index a3b2fb6..821b152 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -6,14 +6,17 @@
 #from parser import Reader, Writer
 
 class SampleFilter(object):
-    def __init__(self, infile, outfile, filters=None, **kwarg):
+    def __init__(self, infile, outfile=None, filters=None, invert=False):
         self.parser = Reader(filename=infile)
         self.samples = self.parser.samples
         self.smp_idx = dict([(v,k) for k,v in enumerate(self.samples)])
         self.outfile = outfile
+        self.invert = invert
+        self.filters = filters
         if filters is not None:
-            self.set_filters(filters, **kwarg)
-            self.write()
+            self.set_filters()
+            if outfile is not None:
+                self.write()
         else:
             print "Samples:"
             for idx, val in enumerate(self.list_samples()):
@@ -22,8 +25,12 @@ def __init__(self, infile, outfile, filters=None, **kwarg):
     def list_samples(self):
         return self.samples
 
-    def set_filters(self, filters, invert=False, **kwarg):
-        filt_l = filters.split(",")
+    def set_filters(self, filters=None, invert=False):
+        if filters is not None:
+            self.filters = filters
+        if invert:
+            self.invert = invert
+        filt_l = self.filters.split(",")
         filt_s = set(filt_l)
         if len(filt_s) < len(filt_l):
             warnings.warn("Non-unique filters, ignoring", RuntimeWarning)
@@ -43,12 +50,15 @@ def filt2idx(item):
             # TODO print the filters that were ignored
             warnings.warn("Invalid filters, ignoring", RuntimeWarning)
 
-        if invert:
+        if self.invert:
             filters = set(xrange(len(self.samples))).difference(filters)
 
         self.parser.set_sample_filter(filters)
 
-    def write(self):
+    def write(self, outfile=None):
+        if outfile is not None:
+            self.outfile = outfile
+        print "outfile:", self.outfile
         #writer = Writer(stream=self.outfile, template=self.parser)
         test_row = self.parser.next()
         print test_row.samples
@@ -63,3 +73,7 @@ def write(self):
     filt = SampleFilter(*sys.argv[1:])
     print "now invert:"
     filt2 = SampleFilter(*sys.argv[1:], invert=True)
+    print "now sequential:"
+    filt3 = SampleFilter(sys.argv[1])
+    filt3.set_filters(sys.argv[3])
+    filt3.write(sys.argv[2])

From 73376c8347bfb61c713085822e73bbf9643ac109 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 2 Jul 2012 17:44:16 -0400
Subject: [PATCH 004/168] Reduced amount of sample filter code in parser.

---
 scripts/vcf_sample_filter.py |  2 +-
 vcf/parser.py                | 29 +++++------------------------
 2 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 821b152..c8aa723 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -53,7 +53,7 @@ def filt2idx(item):
         if self.invert:
             filters = set(xrange(len(self.samples))).difference(filters)
 
-        self.parser.set_sample_filter(filters)
+        self.parser._set_sample_filter(filters)
 
     def write(self, outfile=None):
         if outfile is not None:
diff --git a/vcf/parser.py b/vcf/parser.py
index 11c8454..7c6a8f7 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -172,7 +172,7 @@ def read_meta(self, meta_string):
 class Reader(object):
     """ Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
 
-    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False, samp_filter=None):
+    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False):
         """ Create a new Reader for a VCF file.
 
             You must specify either fsock (stream) or filename.  Gzipped streams
@@ -210,12 +210,12 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self.formats = None
         self.samples = None
         self._sample_indexes = None
+        self._samp_filter = None
         self._header_lines = []
         self._tabix = None
         self._prepend_chr = prepend_chr
         self._parse_metainfo()
         self._format_cache = {}
-        self.set_sample_filter(samp_filter)
 
     def __iter__(self):
         return self
@@ -320,32 +320,13 @@ def _parse_info(self, info_str):
 
         return retdict
 
-    def set_sample_filter(self, samp_filter):
-        self._samp_filter = None
-        if samp_filter is None:
-            return None
-        if isinstance(samp_filter, basestring):
-            samp_filter = samp_filter.split(",")
-        # if filters aren't ints, try to convert to sample indices
-        try:
-            samp_filter = [int(x) for x in samp_filter]
-        except ValueError:
-            try:
-                samp_filter = [self._sample_indexes[samp] for samp in samp_filter]
-            except KeyError:
-                # TODO raise RuntimeWarning about sample not found
-                pass
+    def _set_sample_filter(self, samp_filter):
         self._samp_filter = samp_filter
 
     def _filter_samples(self, samples):
-        if self._samp_filter is None:
-            return samples
         filt = self._samp_filter
         self.samples = [val for idx,val in enumerate(self.samples) if idx not in filt]
         samples = [val for idx,val in enumerate(samples) if idx not in filt]
-        # FIXME this loop doesn't alter the originals
-        #for samplist in (self.samples, samples):
-            #samplist = [val for idx,val in enumerate(samplist) if idx not in filt]
         return samples
 
     def _parse_sample_format(self, samp_fmt):
@@ -377,11 +358,11 @@ def _parse_samples(self, samples, samp_fmt, site):
         # check whether we already know how to parse this format
         if samp_fmt not in self._format_cache:
             self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt)
-
         samp_fmt = self._format_cache[samp_fmt]
 
         # filter samples
-        samples = self._filter_samples(samples)
+        if self._samp_filter is not None:
+            samples = self._filter_samples(samples)
 
         if cparse:
             return cparse.parse_samples(

From 362bbab37cafaa920f6796fcfedf9ba1acf2f721 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 2 Jul 2012 22:00:16 -0400
Subject: [PATCH 005/168] Actually write out sample-filtered file.

---
 scripts/vcf_sample_filter.py | 20 ++++++++++----------
 vcf/parser.py                |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index c8aa723..d08e933 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -58,10 +58,9 @@ def filt2idx(item):
     def write(self, outfile=None):
         if outfile is not None:
             self.outfile = outfile
-        print "outfile:", self.outfile
-        #writer = Writer(stream=self.outfile, template=self.parser)
-        test_row = self.parser.next()
-        print test_row.samples
+        writer = Writer(open(self.outfile, "w"), self.parser)
+        for row in self.parser:
+            writer.write_record(row)
 
 if __name__ == "__main__":
     # TODO implement argparse
@@ -71,9 +70,10 @@ def write(self, outfile=None):
             raise SystemExit
 
     filt = SampleFilter(*sys.argv[1:])
-    print "now invert:"
-    filt2 = SampleFilter(*sys.argv[1:], invert=True)
-    print "now sequential:"
-    filt3 = SampleFilter(sys.argv[1])
-    filt3.set_filters(sys.argv[3])
-    filt3.write(sys.argv[2])
+    #print "now invert:"
+    #filt2 = SampleFilter(*sys.argv[1:], invert=True)
+    #print "now sequential:"
+    #filt3 = SampleFilter(sys.argv[1])
+    #if len(sys.argv) > 3:
+        #filt3.set_filters(sys.argv[3])
+        #filt3.write(sys.argv[2])
diff --git a/vcf/parser.py b/vcf/parser.py
index 7c6a8f7..df44dde 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -324,10 +324,10 @@ def _set_sample_filter(self, samp_filter):
         self._samp_filter = samp_filter
 
     def _filter_samples(self, samples):
-        filt = self._samp_filter
-        self.samples = [val for idx,val in enumerate(self.samples) if idx not in filt]
-        samples = [val for idx,val in enumerate(samples) if idx not in filt]
-        return samples
+        filt = set(self._samp_filter)
+        self.samples = [val for idx,val in enumerate(self.samples)
+                        if idx not in filt]
+        return [val for idx,val in enumerate(samples) if idx not in filt]
 
     def _parse_sample_format(self, samp_fmt):
         """ Parse the format of the calls in this _Record """

From d71b2cd6c184c33931d3b3d9aa464a3bf0176859 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 2 Jul 2012 22:09:40 -0400
Subject: [PATCH 006/168] Switched Writer \r\n to os.linesep.

---
 vcf/parser.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index df44dde..5026572 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -1,10 +1,11 @@
+import codecs
 import collections
-import re
 import csv
 import gzip
-import sys
 import itertools
-import codecs
+import os
+import re
+import sys
 
 try:
     from collections import OrderedDict
@@ -522,15 +523,15 @@ def fetch(self, chrom, start, end=None):
 
 
 class Writer(object):
-    """ VCF Writer """
+    """VCF Writer. On Windows Python 2, open stream with 'wb'."""
 
     fixed_fields = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT".split()
 
     # Reverse keys and values in header field count dictionary
     counts = dict((v,k) for k,v in field_counts.iteritems())
 
-    def __init__(self, stream, template, lineterminator="\r\n"):
-        self.writer = csv.writer(stream, delimiter="\t", lineterminator=lineterminator)
+    def __init__(self, stream, template, eol=os.linesep):
+        self.writer = csv.writer(stream, delimiter="\t", lineterminator=eol)
         self.template = template
         self.stream = stream
 

From bce2c47774d675481f4d50a4200ac4aafb1ed05f Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Tue, 3 Jul 2012 13:59:58 -0400
Subject: [PATCH 007/168] Fixed sample name list update/printing.

---
 scripts/vcf_sample_filter.py |  4 +++-
 vcf/parser.py                | 17 +++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index d08e933..5d039f0 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -53,7 +53,9 @@ def filt2idx(item):
         if self.invert:
             filters = set(xrange(len(self.samples))).difference(filters)
 
-        self.parser._set_sample_filter(filters)
+        # sample_filter is a property that updates parser.samples
+        self.parser.sample_filter = filters
+        print "Keeping these samples:", self.parser.samples
 
     def write(self, outfile=None):
         if outfile is not None:
diff --git a/vcf/parser.py b/vcf/parser.py
index 5026572..279b155 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -211,7 +211,7 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self.formats = None
         self.samples = None
         self._sample_indexes = None
-        self._samp_filter = None
+        self.sample_filter = None
         self._header_lines = []
         self._tabix = None
         self._prepend_chr = prepend_chr
@@ -321,13 +321,22 @@ def _parse_info(self, info_str):
 
         return retdict
 
-    def _set_sample_filter(self, samp_filter):
+    @property
+    def sample_filter(self):
+        return self._samp_filter
+
+    @sample_filter.setter
+    def sample_filter(self, samp_filter):
         self._samp_filter = samp_filter
+        # not None or empty list
+        if samp_filter:
+            self.samples = [val for idx,val in enumerate(self.samples)
+                            if idx not in set(samp_filter)]
+            # XXX could update self._sample indexes or use it as history
+
 
     def _filter_samples(self, samples):
         filt = set(self._samp_filter)
-        self.samples = [val for idx,val in enumerate(self.samples)
-                        if idx not in filt]
         return [val for idx,val in enumerate(samples) if idx not in filt]
 
     def _parse_sample_format(self, samp_fmt):

From 67744c0855b3cee1eec21833273784ce7a27d0b2 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Fri, 6 Jul 2012 18:58:32 -0400
Subject: [PATCH 008/168] Moved all sample filtering to filter script.

---
 scripts/vcf_sample_filter.py | 31 ++++++++++++++++++++++++++-----
 vcf/parser.py                | 23 -----------------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 5d039f0..56e76ee 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -7,9 +7,33 @@
 
 class SampleFilter(object):
     def __init__(self, infile, outfile=None, filters=None, invert=False):
+        # Methods to add to Reader
+        def get_filter(self):
+            return self._samp_filter
+
+        def set_filter(self, filt):
+            self._samp_filter = filt
+            if filt:
+                self.samples = [val for idx,val in enumerate(self.samples)
+                               if idx not in set(filt)]
+
+        def filter_samples(fn):
+            """Decorator function to filter sample parameter"""
+            def filt(self, samples, *args):
+                samples = [val for idx,val in enumerate(samples)
+                           if idx not in set(self.sample_filter)]
+                return fn(self, samples, *args)
+            return filt
+
+        # Add property to Reader for filter list
+        Reader.sample_filter = property(get_filter, set_filter)
+        # Modify Reader._parse_samples to filter samples
+        Reader._parse_samples = filter_samples(Reader._parse_samples)
         self.parser = Reader(filename=infile)
+        # Store initial samples and indices
         self.samples = self.parser.samples
         self.smp_idx = dict([(v,k) for k,v in enumerate(self.samples)])
+        # Properties for filter/writer
         self.outfile = outfile
         self.invert = invert
         self.filters = filters
@@ -19,12 +43,9 @@ def __init__(self, infile, outfile=None, filters=None, invert=False):
                 self.write()
         else:
             print "Samples:"
-            for idx, val in enumerate(self.list_samples()):
+            for idx, val in enumerate(self.samples):
                 print "{0}: {1}".format(idx, val)
 
-    def list_samples(self):
-        return self.samples
-
     def set_filters(self, filters=None, invert=False):
         if filters is not None:
             self.filters = filters
@@ -53,7 +74,7 @@ def filt2idx(item):
         if self.invert:
             filters = set(xrange(len(self.samples))).difference(filters)
 
-        # sample_filter is a property that updates parser.samples
+        # `sample_filter` setter updates `samples`
         self.parser.sample_filter = filters
         print "Keeping these samples:", self.parser.samples
 
diff --git a/vcf/parser.py b/vcf/parser.py
index 279b155..21fe696 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -211,7 +211,6 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self.formats = None
         self.samples = None
         self._sample_indexes = None
-        self.sample_filter = None
         self._header_lines = []
         self._tabix = None
         self._prepend_chr = prepend_chr
@@ -321,24 +320,6 @@ def _parse_info(self, info_str):
 
         return retdict
 
-    @property
-    def sample_filter(self):
-        return self._samp_filter
-
-    @sample_filter.setter
-    def sample_filter(self, samp_filter):
-        self._samp_filter = samp_filter
-        # not None or empty list
-        if samp_filter:
-            self.samples = [val for idx,val in enumerate(self.samples)
-                            if idx not in set(samp_filter)]
-            # XXX could update self._sample indexes or use it as history
-
-
-    def _filter_samples(self, samples):
-        filt = set(self._samp_filter)
-        return [val for idx,val in enumerate(samples) if idx not in filt]
-
     def _parse_sample_format(self, samp_fmt):
         """ Parse the format of the calls in this _Record """
         samp_fmt = make_calldata_tuple(samp_fmt.split(':'))
@@ -370,10 +351,6 @@ def _parse_samples(self, samples, samp_fmt, site):
             self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt)
         samp_fmt = self._format_cache[samp_fmt]
 
-        # filter samples
-        if self._samp_filter is not None:
-            samples = self._filter_samples(samples)
-
         if cparse:
             return cparse.parse_samples(
                 self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site)

From a048ec0a749eb7c01b28e94c6280f4112852e000 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Fri, 6 Jul 2012 20:15:00 -0400
Subject: [PATCH 009/168] Implemented argparse.

---
 scripts/vcf_sample_filter.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 56e76ee..d19a626 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+import argparse
 import sys
 import warnings
 
@@ -47,6 +48,7 @@ def filt(self, samples, *args):
                 print "{0}: {1}".format(idx, val)
 
     def set_filters(self, filters=None, invert=False):
+        """Convert filters from string to list of indices, set on Reader"""
         if filters is not None:
             self.filters = filters
         if invert:
@@ -82,21 +84,24 @@ def write(self, outfile=None):
         if outfile is not None:
             self.outfile = outfile
         writer = Writer(open(self.outfile, "w"), self.parser)
+        print "Writing to '{0}'".format(self.outfile)
         for row in self.parser:
             writer.write_record(row)
 
 if __name__ == "__main__":
-    # TODO implement argparse
-    if len(sys.argv) < 4:
-        print "Usage: script.py infile outfile [filt1,filt2]"
-        if len(sys.argv) < 3:
-            raise SystemExit
+    parser = argparse.ArgumentParser()
+    parser.add_argument("file", type=str,
+                       help="VCF file to filter")
+    parser.add_argument("-f", "--filter", type=str,
+                       help="Comma-separated list of sample indices or names to filter")
+    parser.add_argument("--invert", action="store_true",
+                       help="Keep rather than discard the filtered samples")
+    parser.add_argument("-o", "--outfile", type=str,
+                       help="File to write out filtered samples")
+    # TODO implement quiet (silent if both outfile and filter are specified)
+    parser.add_argument("-q", "--quiet", action="store_true",
+                       help="Less output")
 
-    filt = SampleFilter(*sys.argv[1:])
-    #print "now invert:"
-    #filt2 = SampleFilter(*sys.argv[1:], invert=True)
-    #print "now sequential:"
-    #filt3 = SampleFilter(sys.argv[1])
-    #if len(sys.argv) > 3:
-        #filt3.set_filters(sys.argv[3])
-        #filt3.write(sys.argv[2])
+    args = parser.parse_args()
+
+    SampleFilter(args.file, args.outfile, args.filter, args.invert)

From 19ce645f7a5783cb0ff43f21c7a7815d719776ac Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Fri, 6 Jul 2012 23:24:08 -0400
Subject: [PATCH 010/168] Tweak args, pep8, move empty outfile warning.

---
 scripts/vcf_sample_filter.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index d19a626..5124643 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -4,7 +4,7 @@
 import warnings
 
 from vcf import Reader, Writer
-#from parser import Reader, Writer
+
 
 class SampleFilter(object):
     def __init__(self, infile, outfile=None, filters=None, invert=False):
@@ -15,13 +15,13 @@ def get_filter(self):
         def set_filter(self, filt):
             self._samp_filter = filt
             if filt:
-                self.samples = [val for idx,val in enumerate(self.samples)
+                self.samples = [val for idx, val in enumerate(self.samples)
                                if idx not in set(filt)]
 
         def filter_samples(fn):
             """Decorator function to filter sample parameter"""
             def filt(self, samples, *args):
-                samples = [val for idx,val in enumerate(samples)
+                samples = [val for idx, val in enumerate(samples)
                            if idx not in set(self.sample_filter)]
                 return fn(self, samples, *args)
             return filt
@@ -33,15 +33,14 @@ def filt(self, samples, *args):
         self.parser = Reader(filename=infile)
         # Store initial samples and indices
         self.samples = self.parser.samples
-        self.smp_idx = dict([(v,k) for k,v in enumerate(self.samples)])
+        self.smp_idx = dict([(v, k) for k, v in enumerate(self.samples)])
         # Properties for filter/writer
         self.outfile = outfile
         self.invert = invert
         self.filters = filters
         if filters is not None:
             self.set_filters()
-            if outfile is not None:
-                self.write()
+            self.write()
         else:
             print "Samples:"
             for idx, val in enumerate(self.samples):
@@ -57,6 +56,7 @@ def set_filters(self, filters=None, invert=False):
         filt_s = set(filt_l)
         if len(filt_s) < len(filt_l):
             warnings.warn("Non-unique filters, ignoring", RuntimeWarning)
+
         def filt2idx(item):
             """Convert filter to valid sample index"""
             try:
@@ -83,6 +83,8 @@ def filt2idx(item):
     def write(self, outfile=None):
         if outfile is not None:
             self.outfile = outfile
+        if self.outfile is None:
+            raise IOError("write() called with no outfile")
         writer = Writer(open(self.outfile, "w"), self.parser)
         print "Writing to '{0}'".format(self.outfile)
         for row in self.parser:
@@ -92,11 +94,12 @@ def write(self, outfile=None):
     parser = argparse.ArgumentParser()
     parser.add_argument("file", type=str,
                        help="VCF file to filter")
-    parser.add_argument("-f", "--filter", type=str,
-                       help="Comma-separated list of sample indices or names to filter")
+    parser.add_argument("-f", metavar="filters", type=str,
+                       help="Comma-separated list of sample indices or names \
+                        to filter")
     parser.add_argument("--invert", action="store_true",
                        help="Keep rather than discard the filtered samples")
-    parser.add_argument("-o", "--outfile", type=str,
+    parser.add_argument("-o", metavar="outfile", type=str,
                        help="File to write out filtered samples")
     # TODO implement quiet (silent if both outfile and filter are specified)
     parser.add_argument("-q", "--quiet", action="store_true",

From 95fc70b0bac79aca2ace82221ef78c0e66bd592f Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Sat, 7 Jul 2012 01:45:34 -0400
Subject: [PATCH 011/168] Fixed argparse arg names.

---
 scripts/vcf_sample_filter.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 5124643..228d450 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -107,4 +107,5 @@ def write(self, outfile=None):
 
     args = parser.parse_args()
 
-    SampleFilter(args.file, args.outfile, args.filter, args.invert)
+    SampleFilter(infile=args.file, outfile=args.o,
+                 filters=args.f, invert=args.invert)

From 67afb27fe7711cee0e41cac934fc2933cf195235 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Sat, 7 Jul 2012 02:02:18 -0400
Subject: [PATCH 012/168] Changed default out to sys.stdout

---
 scripts/vcf_sample_filter.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 228d450..deb20e6 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -78,28 +78,29 @@ def filt2idx(item):
 
         # `sample_filter` setter updates `samples`
         self.parser.sample_filter = filters
-        print "Keeping these samples:", self.parser.samples
+        sys.stderr.write("Keeping these samples: {0}\n".format(self.parser.samples))
 
     def write(self, outfile=None):
         if outfile is not None:
             self.outfile = outfile
         if self.outfile is None:
-            raise IOError("write() called with no outfile")
-        writer = Writer(open(self.outfile, "w"), self.parser)
-        print "Writing to '{0}'".format(self.outfile)
+            _out = sys.stdout
+        else:
+            _out = open(self.outfile, "wb")
+        writer = Writer(_out, self.parser)
+        sys.stderr.write("Writing to '{0}'\n".format(self.outfile))
         for row in self.parser:
             writer.write_record(row)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("file", type=str,
-                       help="VCF file to filter")
-    parser.add_argument("-f", metavar="filters", type=str,
+    parser.add_argument("file", help="VCF file to filter")
+    parser.add_argument("-f", metavar="filters",
                        help="Comma-separated list of sample indices or names \
                         to filter")
     parser.add_argument("--invert", action="store_true",
                        help="Keep rather than discard the filtered samples")
-    parser.add_argument("-o", metavar="outfile", type=str,
+    parser.add_argument("-o", metavar="outfile",
                        help="File to write out filtered samples")
     # TODO implement quiet (silent if both outfile and filter are specified)
     parser.add_argument("-q", "--quiet", action="store_true",

From 33d2b5cc17c73185e666ff8ccbb7fbcc13a2d247 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Sat, 7 Jul 2012 02:54:48 -0400
Subject: [PATCH 013/168] Added unit test for sample filtering script.

---
 vcf/test/test_vcf.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 809c237..bfa5cd7 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -3,6 +3,7 @@
 import doctest
 import os
 import commands
+import subprocess
 from StringIO import StringIO
 
 import vcf
@@ -633,6 +634,27 @@ def testOpenFilenameGzipped(self):
         self.assertEqual(self.samples, r.samples)
 
 
+class TestSampleFilter(unittest.TestCase):
+    def testListSamples(self):
+        s, out = commands.getstatusoutput('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf')
+        self.assertEqual(s, 0)
+        expected_out = """Samples:
+0: NA00001
+1: NA00002
+2: NA00003"""
+        self.assertEqual(out, expected_out)
+
+    def testWithFilter(self):
+        out = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf -f 1,2', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0]
+        buf = StringIO()
+        buf.write(out)
+        buf.seek(0)
+        #print(buf.getvalue())
+        reader = vcf.Reader(buf)
+        self.assertEqual(reader.samples, ['NA00001'])
+        #print(reader.next())
+
+
 class TestFilter(unittest.TestCase):
 
 
@@ -760,6 +782,7 @@ def test_trim(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))

From 792d685b90e3b29e55daf9457ce0685b6e26da98 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Sat, 7 Jul 2012 02:59:48 -0400
Subject: [PATCH 014/168] Added authorship statement.

---
 scripts/vcf_sample_filter.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index deb20e6..5314d2e 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -1,4 +1,9 @@
 #!/usr/bin/env python
+
+# Author: Lenna X. Peterson
+# github.com/lennax
+# arklenna at gmail dot com
+
 import argparse
 import sys
 import warnings

From d78a94594597c04c300839a68ea2fc5097e9a13b Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Sat, 7 Jul 2012 03:02:24 -0400
Subject: [PATCH 015/168] Added sample filter to list of scripts in setup.

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index bca3a0d..4ecec0a 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,8 @@
 setup(
     name='PyVCF',
     packages=['vcf', 'vcf.test'],
-    scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py'],
+    scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py',
+             'scripts/vcf_sample_filter.py'],
     author='James Casbon and @jdoughertyii',
     author_email='casbon@gmail.com',
     description='Variant Call Format (VCF) parser for Python',

From 75c4775e7b4e7737bbd53afbe93c7797dd4f1428 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 13:34:56 -0400
Subject: [PATCH 016/168] Moved sample filter object to src dir.

---
 scripts/vcf_sample_filter.py | 94 ++--------------------------------
 vcf/__init__.py              |  3 +-
 vcf/sample_filter.py         | 98 ++++++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 92 deletions(-)
 create mode 100644 vcf/sample_filter.py

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 5314d2e..6ff2bd3 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -5,98 +5,10 @@
 # arklenna at gmail dot com
 
 import argparse
-import sys
-import warnings
 
-from vcf import Reader, Writer
+from vcf import SampleFilter
 
 
-class SampleFilter(object):
-    def __init__(self, infile, outfile=None, filters=None, invert=False):
-        # Methods to add to Reader
-        def get_filter(self):
-            return self._samp_filter
-
-        def set_filter(self, filt):
-            self._samp_filter = filt
-            if filt:
-                self.samples = [val for idx, val in enumerate(self.samples)
-                               if idx not in set(filt)]
-
-        def filter_samples(fn):
-            """Decorator function to filter sample parameter"""
-            def filt(self, samples, *args):
-                samples = [val for idx, val in enumerate(samples)
-                           if idx not in set(self.sample_filter)]
-                return fn(self, samples, *args)
-            return filt
-
-        # Add property to Reader for filter list
-        Reader.sample_filter = property(get_filter, set_filter)
-        # Modify Reader._parse_samples to filter samples
-        Reader._parse_samples = filter_samples(Reader._parse_samples)
-        self.parser = Reader(filename=infile)
-        # Store initial samples and indices
-        self.samples = self.parser.samples
-        self.smp_idx = dict([(v, k) for k, v in enumerate(self.samples)])
-        # Properties for filter/writer
-        self.outfile = outfile
-        self.invert = invert
-        self.filters = filters
-        if filters is not None:
-            self.set_filters()
-            self.write()
-        else:
-            print "Samples:"
-            for idx, val in enumerate(self.samples):
-                print "{0}: {1}".format(idx, val)
-
-    def set_filters(self, filters=None, invert=False):
-        """Convert filters from string to list of indices, set on Reader"""
-        if filters is not None:
-            self.filters = filters
-        if invert:
-            self.invert = invert
-        filt_l = self.filters.split(",")
-        filt_s = set(filt_l)
-        if len(filt_s) < len(filt_l):
-            warnings.warn("Non-unique filters, ignoring", RuntimeWarning)
-
-        def filt2idx(item):
-            """Convert filter to valid sample index"""
-            try:
-                item = int(item)
-            except ValueError:
-                # not an idx, check if it's a value
-                return self.smp_idx.get(item)
-            else:
-                # is int, check if it's an idx
-                if item < len(self.samples):
-                    return item
-        filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s)))
-        if len(filters) < len(filt_s):
-            # TODO print the filters that were ignored
-            warnings.warn("Invalid filters, ignoring", RuntimeWarning)
-
-        if self.invert:
-            filters = set(xrange(len(self.samples))).difference(filters)
-
-        # `sample_filter` setter updates `samples`
-        self.parser.sample_filter = filters
-        sys.stderr.write("Keeping these samples: {0}\n".format(self.parser.samples))
-
-    def write(self, outfile=None):
-        if outfile is not None:
-            self.outfile = outfile
-        if self.outfile is None:
-            _out = sys.stdout
-        else:
-            _out = open(self.outfile, "wb")
-        writer = Writer(_out, self.parser)
-        sys.stderr.write("Writing to '{0}'\n".format(self.outfile))
-        for row in self.parser:
-            writer.write_record(row)
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("file", help="VCF file to filter")
@@ -108,8 +20,8 @@ def write(self, outfile=None):
     parser.add_argument("-o", metavar="outfile",
                        help="File to write out filtered samples")
     # TODO implement quiet (silent if both outfile and filter are specified)
-    parser.add_argument("-q", "--quiet", action="store_true",
-                       help="Less output")
+    #parser.add_argument("-q", "--quiet", action="store_true",
+                       #help="Less output")
 
     args = parser.parse_args()
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 2935c73..586820e 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -59,7 +59,7 @@
     >>> print record.INFO['AF']
     [0.5]
 
-There are a number of convienience methods and properties for each ``Record`` allowing you to
+There are a number of convenience methods and properties for each ``Record`` allowing you to
 examine properties of interest::
 
     >>> print record.num_called, record.call_rate, record.num_unknown
@@ -176,5 +176,6 @@
 from vcf.parser import VCFReader, VCFWriter
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
+from vcf.sample_filter import SampleFilter
 
 VERSION = '0.5.0'
diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
new file mode 100644
index 0000000..38acaec
--- /dev/null
+++ b/vcf/sample_filter.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+# Author: Lenna X. Peterson
+# github.com/lennax
+# arklenna at gmail dot com
+
+import sys
+import warnings
+
+
+from parser import Reader, Writer
+
+
+class SampleFilter(object):
+    def __init__(self, infile, outfile=None, filters=None, invert=False):
+        # Methods to add to Reader
+        def get_filter(self):
+            return self._samp_filter
+
+        def set_filter(self, filt):
+            self._samp_filter = filt
+            if filt:
+                self.samples = [val for idx, val in enumerate(self.samples)
+                               if idx not in set(filt)]
+
+        def filter_samples(fn):
+            """Decorator function to filter sample parameter"""
+            def filt(self, samples, *args):
+                samples = [val for idx, val in enumerate(samples)
+                           if idx not in set(self.sample_filter)]
+                return fn(self, samples, *args)
+            return filt
+
+        # Add property to Reader for filter list
+        Reader.sample_filter = property(get_filter, set_filter)
+        # Modify Reader._parse_samples to filter samples
+        Reader._parse_samples = filter_samples(Reader._parse_samples)
+        self.parser = Reader(filename=infile)
+        # Store initial samples and indices
+        self.samples = self.parser.samples
+        self.smp_idx = dict([(v, k) for k, v in enumerate(self.samples)])
+        # Properties for filter/writer
+        self.outfile = outfile
+        self.invert = invert
+        self.filters = filters
+        if filters is not None:
+            self.set_filters()
+            self.write()
+        else:
+            print "Samples:"
+            for idx, val in enumerate(self.samples):
+                print "{0}: {1}".format(idx, val)
+
+    def set_filters(self, filters=None, invert=False):
+        """Convert filters from string to list of indices, set on Reader"""
+        if filters is not None:
+            self.filters = filters
+        if invert:
+            self.invert = invert
+        filt_l = self.filters.split(",")
+        filt_s = set(filt_l)
+        if len(filt_s) < len(filt_l):
+            warnings.warn("Non-unique filters, ignoring", RuntimeWarning)
+
+        def filt2idx(item):
+            """Convert filter to valid sample index"""
+            try:
+                item = int(item)
+            except ValueError:
+                # not an idx, check if it's a value
+                return self.smp_idx.get(item)
+            else:
+                # is int, check if it's an idx
+                if item < len(self.samples):
+                    return item
+        filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s)))
+        if len(filters) < len(filt_s):
+            # TODO print the filters that were ignored
+            warnings.warn("Invalid filters, ignoring", RuntimeWarning)
+
+        if self.invert:
+            filters = set(xrange(len(self.samples))).difference(filters)
+
+        # `sample_filter` setter updates `samples`
+        self.parser.sample_filter = filters
+        sys.stderr.write("Keeping these samples: {0}\n".format(self.parser.samples))
+
+    def write(self, outfile=None):
+        if outfile is not None:
+            self.outfile = outfile
+        if self.outfile is None:
+            _out = sys.stdout
+        else:
+            _out = open(self.outfile, "wb")
+        sys.stderr.write("Writing to '{0}'\n".format(self.outfile))
+        writer = Writer(_out, self.parser)
+        for row in self.parser:
+            writer.write_record(row)

From 0047032811b732668a45b6f63599b229c5cda20c Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 14:43:21 -0400
Subject: [PATCH 017/168] Using logging for easy quiet mode.

---
 scripts/vcf_sample_filter.py | 26 ++++++++++++++++++--------
 vcf/sample_filter.py         | 10 ++++------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
index 6ff2bd3..d71e6a3 100644
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -5,6 +5,7 @@
 # arklenna at gmail dot com
 
 import argparse
+import logging
 
 from vcf import SampleFilter
 
@@ -12,18 +13,27 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("file", help="VCF file to filter")
+    parser.add_argument("-o", metavar="outfile",
+                       help="File to write out filtered samples")
     parser.add_argument("-f", metavar="filters",
                        help="Comma-separated list of sample indices or names \
                         to filter")
-    parser.add_argument("--invert", action="store_true",
+    parser.add_argument("-i", "--invert", action="store_true",
                        help="Keep rather than discard the filtered samples")
-    parser.add_argument("-o", metavar="outfile",
-                       help="File to write out filtered samples")
-    # TODO implement quiet (silent if both outfile and filter are specified)
-    #parser.add_argument("-q", "--quiet", action="store_true",
-                       #help="Less output")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                       help="Less output")
 
     args = parser.parse_args()
 
-    SampleFilter(infile=args.file, outfile=args.o,
-                 filters=args.f, invert=args.invert)
+    if args.quiet:
+        log_level = logging.WARNING
+    else:
+        log_level = logging.INFO
+    logging.basicConfig(format='%(message)s', level=log_level)
+
+    sf = SampleFilter(infile=args.file, outfile=args.o,
+                      filters=args.f, invert=args.invert)
+    if args.f is None:
+        print "Samples:"
+        for idx, val in enumerate(sf.samples):
+            print "{0}: {1}".format(idx, val)
diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
index 38acaec..c9d4f31 100644
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -4,6 +4,7 @@
 # github.com/lennax
 # arklenna at gmail dot com
 
+import logging
 import sys
 import warnings
 
@@ -46,10 +47,6 @@ def filt(self, samples, *args):
         if filters is not None:
             self.set_filters()
             self.write()
-        else:
-            print "Samples:"
-            for idx, val in enumerate(self.samples):
-                print "{0}: {1}".format(idx, val)
 
     def set_filters(self, filters=None, invert=False):
         """Convert filters from string to list of indices, set on Reader"""
@@ -83,7 +80,8 @@ def filt2idx(item):
 
         # `sample_filter` setter updates `samples`
         self.parser.sample_filter = filters
-        sys.stderr.write("Keeping these samples: {0}\n".format(self.parser.samples))
+        logging.info("Keeping these samples: {0}\n".format(self.parser.samples))
+        return self.parser.samples
 
     def write(self, outfile=None):
         if outfile is not None:
@@ -92,7 +90,7 @@ def write(self, outfile=None):
             _out = sys.stdout
         else:
             _out = open(self.outfile, "wb")
-        sys.stderr.write("Writing to '{0}'\n".format(self.outfile))
+        logging.info("Writing to '{0}'\n".format(self.outfile))
         writer = Writer(_out, self.parser)
         for row in self.parser:
             writer.write_record(row)

From 6b1fa897a7ee2339e40d2f9aae4d1fbad946d4df Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 15:39:32 -0400
Subject: [PATCH 018/168] Unit test for sample filter module.

---
 vcf/sample_filter.py |  7 +++++++
 vcf/test/test_vcf.py | 28 +++++++++++++++++++++++++---
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
index c9d4f31..b963948 100644
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -35,6 +35,7 @@ def filt(self, samples, *args):
         # Add property to Reader for filter list
         Reader.sample_filter = property(get_filter, set_filter)
         # Modify Reader._parse_samples to filter samples
+        self._orig_parse_samples = Reader._parse_samples
         Reader._parse_samples = filter_samples(Reader._parse_samples)
         self.parser = Reader(filename=infile)
         # Store initial samples and indices
@@ -88,9 +89,15 @@ def write(self, outfile=None):
             self.outfile = outfile
         if self.outfile is None:
             _out = sys.stdout
+        elif hasattr(self.outfile, 'write'):
+            _out = self.outfile
         else:
             _out = open(self.outfile, "wb")
         logging.info("Writing to '{0}'\n".format(self.outfile))
         writer = Writer(_out, self.parser)
         for row in self.parser:
             writer.write_record(row)
+
+    def undo_monkey_patch(self):
+        delattr(Reader, 'sample_filter')
+        Reader._parse_samples = self._orig_parse_samples
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index bfa5cd7..36662b0 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -635,7 +635,7 @@ def testOpenFilenameGzipped(self):
 
 
 class TestSampleFilter(unittest.TestCase):
-    def testListSamples(self):
+    def testCLIListSamples(self):
         s, out = commands.getstatusoutput('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf')
         self.assertEqual(s, 0)
         expected_out = """Samples:
@@ -644,7 +644,7 @@ def testListSamples(self):
 2: NA00003"""
         self.assertEqual(out, expected_out)
 
-    def testWithFilter(self):
+    def testCLIWithFilter(self):
         out = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf -f 1,2', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0]
         buf = StringIO()
         buf.write(out)
@@ -652,7 +652,29 @@ def testWithFilter(self):
         #print(buf.getvalue())
         reader = vcf.Reader(buf)
         self.assertEqual(reader.samples, ['NA00001'])
-        #print(reader.next())
+        rec = reader.next()
+        self.assertEqual(len(rec.samples), 1)
+
+    def testSampleFilterModule(self):
+        # init filter with filename, get list of samples
+        filt = vcf.SampleFilter('vcf/test/example-4.1.vcf')
+        self.assertEqual(filt.samples, ['NA00001', 'NA00002', 'NA00003'])
+        # set filter, check which samples will be kept
+        filtered = filt.set_filters(filters="0", invert=True)
+        self.assertEqual(filtered, ['NA00001'])
+        # write filtered file to StringIO
+        buf = StringIO()
+        filt.write(buf)
+        buf.seek(0)
+        #print(buf.getvalue())
+        # undo monkey patch
+        filt.undo_monkey_patch()
+        # read output
+        reader = vcf.Reader(buf)
+        self.assertEqual(reader.samples, ['NA00001'])
+        print(dir(reader))
+        rec = reader.next()
+        self.assertEqual(len(rec.samples), 1)
 
 
 class TestFilter(unittest.TestCase):

From 817f5e9fd140ac35c99f57ee045446861889912c Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 15:58:32 -0400
Subject: [PATCH 019/168] Docs/test for undo_monkey_patch

---
 vcf/sample_filter.py | 11 ++++++++---
 vcf/test/test_vcf.py |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
index b963948..6e5e66e 100644
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 # Author: Lenna X. Peterson
 # github.com/lennax
 # arklenna at gmail dot com
@@ -13,6 +11,13 @@
 
 
 class SampleFilter(object):
+    """
+    Modifies the vcf Reader to filter each row by sample as it is parsed.
+    When using the class, be sure to call `undo_monkey_patch()` to restore
+    the original functionality to the Reader.
+
+    """
+
     def __init__(self, infile, outfile=None, filters=None, invert=False):
         # Methods to add to Reader
         def get_filter(self):
@@ -99,5 +104,5 @@ def write(self, outfile=None):
             writer.write_record(row)
 
     def undo_monkey_patch(self):
-        delattr(Reader, 'sample_filter')
         Reader._parse_samples = self._orig_parse_samples
+        delattr(Reader, 'sample_filter')
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 36662b0..ab07f2e 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -669,10 +669,10 @@ def testSampleFilterModule(self):
         #print(buf.getvalue())
         # undo monkey patch
         filt.undo_monkey_patch()
+        self.assertTrue('sample_filter' not in dir(vcf.Reader))
         # read output
         reader = vcf.Reader(buf)
         self.assertEqual(reader.samples, ['NA00001'])
-        print(dir(reader))
         rec = reader.next()
         self.assertEqual(len(rec.samples), 1)
 

From 0b0d8093fc951cdde37627ad0e7897e8ebf7ca3d Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 16:20:46 -0400
Subject: [PATCH 020/168] Changed tests to use subprocess returncode.

---
 vcf/test/test_vcf.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ab07f2e..63f740c 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -636,16 +636,19 @@ def testOpenFilenameGzipped(self):
 
 class TestSampleFilter(unittest.TestCase):
     def testCLIListSamples(self):
-        s, out = commands.getstatusoutput('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf')
-        self.assertEqual(s, 0)
-        expected_out = """Samples:
-0: NA00001
-1: NA00002
-2: NA00003"""
-        self.assertEqual(out, expected_out)
+        proc = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(proc.returncode, 0)
+        self.assertFalse(err)
+        expected_out = ['Samples:', '0: NA00001', '1: NA00002', '2: NA00003']
+        self.assertEqual(out.splitlines(), expected_out)
 
     def testCLIWithFilter(self):
-        out = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf -f 1,2', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0]
+        proc = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf -f 1,2 --quiet', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(proc.returncode, 0)
+        self.assertTrue(out)
+        self.assertFalse(err)
         buf = StringIO()
         buf.write(out)
         buf.seek(0)

From 746ece940739e93aa45d43ca0101432d5a74df19 Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 16:40:25 -0400
Subject: [PATCH 021/168] Destructor undoes patch; warn if 0 samples kept

---
 vcf/sample_filter.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
index 6e5e66e..8470158 100644
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -54,6 +54,12 @@ def filt(self, samples, *args):
             self.set_filters()
             self.write()
 
+    def __del__(self):
+        try:
+            self.undo_monkey_patch()
+        except AttributeError:
+            pass
+
     def set_filters(self, filters=None, invert=False):
         """Convert filters from string to list of indices, set on Reader"""
         if filters is not None:
@@ -86,6 +92,8 @@ def filt2idx(item):
 
         # `sample_filter` setter updates `samples`
         self.parser.sample_filter = filters
+        if len(self.parser.samples) == 0:
+            warnings.warn("Number of samples to keep is zero", RuntimeWarning)
         logging.info("Keeping these samples: {0}\n".format(self.parser.samples))
         return self.parser.samples
 

From 30321c502710a5b9735737b803c59df56806955e Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 16:47:41 -0400
Subject: [PATCH 022/168] Recommend explicit use of del.

---
 vcf/sample_filter.py | 8 ++++----
 vcf/test/test_vcf.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
index 8470158..2a80c5d 100644
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -13,8 +13,8 @@
 class SampleFilter(object):
     """
     Modifies the vcf Reader to filter each row by sample as it is parsed.
-    When using the class, be sure to call `undo_monkey_patch()` to restore
-    the original functionality to the Reader.
+    After using this class, call del on its instance to remove filtering
+    and restore the original functionality to the Reader.
 
     """
 
@@ -56,7 +56,7 @@ def filt(self, samples, *args):
 
     def __del__(self):
         try:
-            self.undo_monkey_patch()
+            self._undo_monkey_patch()
         except AttributeError:
             pass
 
@@ -111,6 +111,6 @@ def write(self, outfile=None):
         for row in self.parser:
             writer.write_record(row)
 
-    def undo_monkey_patch(self):
+    def _undo_monkey_patch(self):
         Reader._parse_samples = self._orig_parse_samples
         delattr(Reader, 'sample_filter')
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 63f740c..3bdee43 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -670,8 +670,8 @@ def testSampleFilterModule(self):
         filt.write(buf)
         buf.seek(0)
         #print(buf.getvalue())
-        # undo monkey patch
-        filt.undo_monkey_patch()
+        # undo monkey patch by destroying instance
+        del filt
         self.assertTrue('sample_filter' not in dir(vcf.Reader))
         # read output
         reader = vcf.Reader(buf)

From 49f889731b69ea9a42e2425743cfe2d910d35bab Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Mon, 9 Jul 2012 17:01:01 -0400
Subject: [PATCH 023/168] Added empty filter list; del is now less critical.

---
 vcf/sample_filter.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
index 2a80c5d..b156b45 100644
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -13,8 +13,6 @@
 class SampleFilter(object):
     """
     Modifies the vcf Reader to filter each row by sample as it is parsed.
-    After using this class, call del on its instance to remove filtering
-    and restore the original functionality to the Reader.
 
     """
 
@@ -39,6 +37,7 @@ def filt(self, samples, *args):
 
         # Add property to Reader for filter list
         Reader.sample_filter = property(get_filter, set_filter)
+        Reader._samp_filter = []
         # Modify Reader._parse_samples to filter samples
         self._orig_parse_samples = Reader._parse_samples
         Reader._parse_samples = filter_samples(Reader._parse_samples)

From e63960ccdc065f8c439105dc9609d1ba91adaa95 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Tue, 27 Nov 2012 08:38:33 +0000
Subject: [PATCH 024/168] apply 0.6.0 release which seemed to get commited off
 of a branch

---
 docs/HISTORY.rst | 10 ++++++++++
 vcf/__init__.py  |  2 +-
 vcf/model.py     | 42 +++++++++++++++++++++++++++---------------
 vcf/test/prof.py |  2 +-
 4 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 396ffa7..085e24c 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,16 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.0 Release
+-------------
+
+* Backwards incompatible change: _Call.data is now a 
+  namedtuple (previously it was a dict)
+* Optional cython version, much improved performance.  
+* Improvements to writer (thanks @cmclean)
+* Improvements to inheritance of classes (thanks @lennax)
+
+
 0.5.0 Release
 -------------
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 2935c73..f7aa7ca 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.5.0'
+VERSION = '0.6.0'
diff --git a/vcf/model.py b/vcf/model.py
index 9a27f87..9748784 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -2,6 +2,7 @@
 import collections
 import sys
 
+
 class _Call(object):
     """ A genotype call, a cell entry in a VCF file"""
 
@@ -20,7 +21,7 @@ def __init__(self, site, sample, data):
             self.called = self.gt_nums is not None
         except AttributeError:
             self.gt_nums = None
-            # FIXME how do we know if a non GT call is called?
+            #62 a call without a genotype is not defined as called or not
             self.called = None
 
     def __repr__(self):
@@ -70,10 +71,14 @@ def gt_type(self):
         if self.called:
             alleles = self.gt_alleles
             if all(X == alleles[0] for X in alleles[1:]):
-                if alleles[0] == "0": return 0
-                else: return 2
-            else: return 1
-        else: return None
+                if alleles[0] == "0":
+                    return 0
+                else:
+                    return 2
+            else:
+                return 1
+        else:
+            return None
 
     @property
     def phased(self):
@@ -145,7 +150,7 @@ def __str__(self):
         return "Record(CHROM=%(CHROM)s, POS=%(POS)s, REF=%(REF)s, ALT=%(ALT)s)" % self.__dict__
 
     def __cmp__(self, other):
-        return cmp( (self.CHROM, self.POS), (other.CHROM, other.POS))
+        return cmp((self.CHROM, self.POS), (other.CHROM, other.POS))
 
     def add_format(self, fmt):
         self.FORMAT = self.FORMAT + ':' + fmt
@@ -199,7 +204,6 @@ def aaf(self):
         # skip if more than one alternate allele. assumes bi-allelic
         if len(self.ALT) > 1:
             return None
-        hom_ref = self.num_hom_ref
         het = self.num_het
         hom_alt = self.num_hom_alt
         num_chroms = float(2.0 * self.num_called)
@@ -244,7 +248,8 @@ def get_unknowns(self):
     @property
     def is_snp(self):
         """ Return whether or not the variant is a SNP """
-        if len(self.REF) > 1: return False
+        if len(self.REF) > 1:
+            return False
         for alt in self.ALT:
             if alt is None or alt.type != "SNV":
                 return False
@@ -257,7 +262,8 @@ def is_indel(self):
         """ Return whether or not the variant is an INDEL """
         is_sv = self.is_sv
 
-        if len(self.REF) > 1 and not is_sv: return True
+        if len(self.REF) > 1 and not is_sv:
+            return True
         for alt in self.ALT:
             if alt is None:
                 return True
@@ -284,7 +290,8 @@ def is_sv(self):
     def is_transition(self):
         """ Return whether or not the SNP is a transition """
         # if multiple alts, it is unclear if we have a transition
-        if len(self.ALT) > 1: return False
+        if len(self.ALT) > 1:
+            return False
 
         if self.is_snp:
             # just one alt allele
@@ -294,14 +301,17 @@ def is_transition(self):
                 (self.REF == "C" and alt_allele == "T") or
                 (self.REF == "T" and alt_allele == "C")):
                 return True
-            else: return False
-        else: return False
+            else:
+                return False
+        else:
+            return False
 
     @property
     def is_deletion(self):
         """ Return whether or not the INDEL is a deletion """
         # if multiple alts, it is unclear if we have a transition
-        if len(self.ALT) > 1: return False
+        if len(self.ALT) > 1:
+            return False
 
         if self.is_indel:
             # just one alt allele
@@ -310,8 +320,10 @@ def is_deletion(self):
                 return True
             if len(self.REF) > len(alt_allele):
                 return True
-            else: return False
-        else: return False
+            else:
+                return False
+        else:
+            return False
 
     @property
     def var_type(self):
diff --git a/vcf/test/prof.py b/vcf/test/prof.py
index 62c72fe..953d169 100755
--- a/vcf/test/prof.py
+++ b/vcf/test/prof.py
@@ -1,4 +1,4 @@
-import vcf
+import vcf as vcf
 import cProfile
 import timeit
 import pstats

From fb835a2a3023116e8477412949eb10d7459f6a39 Mon Sep 17 00:00:00 2001
From: Marco Falcioni <marcofalcioni@yahoo.com>
Date: Wed, 14 Nov 2012 11:48:29 -0800
Subject: [PATCH 025/168] Changed the rule to split records into columns

According to the specification the columns must be tab separated. I encountered an VCF file from NCBI that has spaces in the INFO column, which caused PyVCF to fail.
http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index f274e9c..6d938b6 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -437,7 +437,7 @@ def _parse_alt(self, str):
     def next(self):
         '''Return the next record in the file.'''
         line = self.reader.next()
-        row = re.split('\t| +', line)
+        row = re.split('\t+', line)
         chrom = row[0]
         if self._prepend_chr:
             chrom = 'chr' + chrom

From b6c085b74ce5c2acd6a785452e6f0f9062b1789d Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Tue, 27 Nov 2012 08:22:53 +0000
Subject: [PATCH 026/168] add strict whitespace option to allow for well formed
 VCFs with spaces in sample names.

---
 vcf/parser.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 6d938b6..96ddba1 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -172,12 +172,19 @@ def read_meta(self, meta_string):
 class Reader(object):
     """ Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
 
-    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False):
+    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False,
+                 strict_whitespace=False):
         """ Create a new Reader for a VCF file.
 
             You must specify either fsock (stream) or filename.  Gzipped streams
             or files are attempted to be recogized by the file extension, or gzipped
             can be forced with ``compressed=True``
+
+            'prepend_chr=True' will put 'chr' before all the CHROM values, useful
+            for different sources.
+
+            'strict_whitespace=True' will split records on tabs only (as with VCF
+            spec) which allows you to parse files with spaces in the sample names.
         """
         super(Reader, self).__init__()
 
@@ -218,6 +225,11 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self._parse_metainfo()
         self._format_cache = {}
 
+        if strict_whitespace:
+            self._separator = '\t'
+        else:
+            self._separator = '\t| +'
+
     def __iter__(self):
         return self
 
@@ -437,7 +449,7 @@ def _parse_alt(self, str):
     def next(self):
         '''Return the next record in the file.'''
         line = self.reader.next()
-        row = re.split('\t+', line)
+        row = re.split(self._separator, line)
         chrom = row[0]
         if self._prepend_chr:
             chrom = 'chr' + chrom

From 3cd09d5c637d368a65916de70c7a8ba80d936f31 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Tue, 27 Nov 2012 08:46:30 +0000
Subject: [PATCH 027/168] 0.6.1 release

---
 README.rst       | 16 ++++++++--------
 docs/HISTORY.rst | 10 ++++++++++
 vcf/__init__.py  |  2 +-
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/README.rst b/README.rst
index 83792ce..52bd780 100644
--- a/README.rst
+++ b/README.rst
@@ -14,7 +14,7 @@ There main interface is the class: ``Reader``.  It takes a file-like
 object and acts as a reader::
 
     >>> import vcf
-    >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
+    >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
     >>> for record in vcf_reader:
     ...     print record
     Record(CHROM=20, POS=14370, REF=G, ALT=[A])
@@ -49,7 +49,7 @@ one-entry Python lists (see, e.g., ``Record.ALT``).  Semicolon-delimited lists
 of key=value pairs are converted to Python dictionaries, with flags being given
 a ``True`` value. Integers and floats are handled exactly as you'd expect::
 
-    >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
+    >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
     >>> record = vcf_reader.next()
     >>> print record.POS
     14370
@@ -68,7 +68,7 @@ examine properties of interest::
     >>> print record.nucl_diversity, record.aaf
     0.6 0.5
     >>> print record.get_hets()
-    [Call(sample=NA00002, GT=1|0, HQ=[51, 51], DP=8, GQ=48)]
+    [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
     >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
     True False True False
     >>> print record.var_type, record.var_subtype
@@ -101,7 +101,7 @@ call data in ``data``::
      >>> print call.sample
      NA00001
      >>> print call.data
-     {'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49}
+     CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50])
 
 Please note that as of release 0.4.0, attributes known to have single values (such as
 ``DP`` and ``GQ`` above) are returned as values.  Other attributes are returned
@@ -134,7 +134,7 @@ For example::
 
 ALT records are actually classes, so that you can interrogate them::
 
-    >>> reader = vcf.Reader(file('vcf/test/example-4.1-bnd.vcf'))
+    >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
     >>> _ = reader.next(); row = reader.next()
     >>> print row
     Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
@@ -146,14 +146,14 @@ Random access is supported for files with tabix indexes.  Simply call fetch for
 region you are interested in::
 
     >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
-    >>> for record in vcf_reader.fetch('20', 1110696, 1230237):
+    >>> for record in vcf_reader.fetch('20', 1110696, 1230237):  # doctest: +SKIP
     ...     print record
     Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
     Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
 
 Or extract a single row::
 
-    >>> print vcf_reader.fetch('20', 1110696)
+    >>> print vcf_reader.fetch('20', 1110696)  # doctest: +SKIP
     Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
 
 
@@ -161,7 +161,7 @@ The ``Writer`` class provides a way of writing a VCF file.  Currently, you must
 template ``Reader`` which provides the metadata::
 
     >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
-    >>> vcf_writer = vcf.Writer(file('/dev/null', 'w'), vcf_reader)
+    >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
     >>> for record in vcf_reader:
     ...     vcf_writer.write_record(record)
 
diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 085e24c..1e61871 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,16 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.1 Release
+-------------
+
+* Add strict whitespace mode for well formed VCFs with spaces 
+  in sample names (thanks Marco)
+* Ignore blank lines in files (thanks Martijn)
+* Tweaks for handling missing data (thanks Sean)
+* bcftools tests (thanks Martijn)
+* record.FILTER is always a list
+
 0.6.0 Release
 -------------
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index f7aa7ca..a8be533 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.0'
+VERSION = '0.6.1'

From f554810ea2510dc969fd2dcc8776433d06a08348 Mon Sep 17 00:00:00 2001
From: chapmanb <chapmanb@50mail.com>
Date: Mon, 3 Dec 2012 08:09:34 -0500
Subject: [PATCH 028/168] Allow flexibility in parsing INFO values specified as
 integers in the header: also allow float values.

---
 vcf/parser.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 96ddba1..4d54e74 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -310,7 +310,12 @@ def _parse_info(self, info_str):
 
             if entry_type == 'Integer':
                 vals = entry[1].split(',')
-                val = self._map(int, vals)
+                try:
+                    val = self._map(int, vals)
+                # Allow specified integers to be flexibly parsed as floats.
+                # Handles cases with incorrectly specified header types.
+                except ValueError:
+                    val = self._map(float, vals)
             elif entry_type == 'Float':
                 vals = entry[1].split(',')
                 val = self._map(float, vals)
@@ -392,7 +397,10 @@ def _parse_samples(self, samples, samp_fmt, site):
                 if entry_num == 1 or ',' not in vals:
 
                     if entry_type == 'Integer':
-                        sampdat[i] = int(vals)
+                        try:
+                            sampdat[i] = int(vals)
+                        except ValueError:
+                            sampdat[i] = float(vals)
                     elif entry_type == 'Float':
                         sampdat[i] = float(vals)
                     else:
@@ -406,7 +414,10 @@ def _parse_samples(self, samples, samp_fmt, site):
                 vals = vals.split(',')
 
                 if entry_type == 'Integer':
-                    sampdat[i] = _map(int, vals)
+                    try:
+                        sampdat[i] = _map(int, vals)
+                    except ValueError:
+                        sampdat[i] = _map(float, vals)
                 elif entry_type == 'Float' or entry_type == 'Numeric':
                     sampdat[i] = _map(float, vals)
                 else:

From b79302060ba68732870c123abe15936331518ee2 Mon Sep 17 00:00:00 2001
From: Sean Davis <seandavi@gmail.com>
Date: Wed, 5 Dec 2012 12:11:06 -0500
Subject: [PATCH 029/168] Fixes #78

---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 4d54e74..51e8af1 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -328,7 +328,7 @@ def _parse_info(self, info_str):
                     val = True
 
             try:
-                if self.infos[ID].num == 1 and entry_type != 'String':
+                if self.infos[ID].num == 1 and entry_type not in ( 'String', 'Flag'):
                     val = val[0]
             except KeyError:
                 pass

From c957aab97a2dec018fe18e24010aa3a1ce11c2ba Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Thu, 6 Dec 2012 21:06:01 +0000
Subject: [PATCH 030/168] 0.6.2 version bump

---
 tox.ini         | 1 +
 vcf/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 16847bb..771e15f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,6 +15,7 @@ commands =
 deps =
     argparse
     ordereddict
+    cython
     pysam
 
 [testenv:py27]
diff --git a/vcf/__init__.py b/vcf/__init__.py
index a8be533..cdd1545 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.1'
+VERSION = '0.6.2'

From 95fd749220d3cf3dedda77624f7e9f1e544c01b7 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Thu, 6 Dec 2012 21:07:23 +0000
Subject: [PATCH 031/168] history update

---
 docs/HISTORY.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 1e61871..3147631 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,11 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.2 Release
+-------------
+
+* issues #78, #79 (thanks Sean, Brad) 
+
 0.6.1 Release
 -------------
 

From 8acaeb3a151a18385748d82ea1fa7310d0a9da4e Mon Sep 17 00:00:00 2001
From: chapmanb <chapmanb@50mail.com>
Date: Wed, 26 Dec 2012 07:51:19 -0500
Subject: [PATCH 032/168] Correctly format contig output lines from writer,
 making output VCFs compatible with GATK. Fixes #74

---
 vcf/parser.py        | 6 +++++-
 vcf/test/test_vcf.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 51e8af1..97a202d 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -549,12 +549,16 @@ def __init__(self, stream, template, lineterminator="\r\n"):
 
         two = '##{key}=<ID={0},Description="{1}">\n'
         four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
+        contig_format = '##contig=<ID={ID},length={length},assembly={assembly}>\n'
         _num = self._fix_field_count
         for (key, vals) in template.metadata.iteritems():
             if key in SINGULAR_METADATA:
                 vals = [vals]
             for val in vals:
-                stream.write('##{0}={1}\n'.format(key, val))
+                if key == "contig":
+                    stream.write(contig_format.format(**val))
+                else:
+                    stream.write('##{0}={1}\n'.format(key, val))
         for line in template.infos.itervalues():
             stream.write(four.format(key="INFO", *line, num=_num(line.num)))
         for line in template.formats.itervalues():
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index e4c3426..a88f4ef 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -218,7 +218,11 @@ def testWrite(self):
         for record in records:
             writer.write_record(record)
         out.seek(0)
-        print (out.getvalue())
+        out_str = out.getvalue()
+        for line in out_str.split("\n"):
+            if line.startswith("##contig"):
+                assert "<ID=" in line, "Found dictionary in contig line: {0}".format(line)
+        print (out_str)
         reader2 = vcf.Reader(out)
 
         self.assertEquals(reader.samples, reader2.samples)

From 9d43fa917033f20ac2a95635582ec915586f4125 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 10 Jan 2013 16:35:40 +0100
Subject: [PATCH 033/168] Correctly write meta lines with dictionary value

Write meta lines with a dictionary-like value as

    ##meta=<field=value,field=value,...>

instead of as the Python dictionary string representation. This is a
fix for jamescasbon#83 and a generalization of jamescasbon#81. A
regression compared to jamescasbon#81 is that the order of fields in
a `contig` line is no longer defined.
---
 vcf/parser.py        |  7 ++++---
 vcf/test/test_vcf.py | 24 +++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 97a202d..4dd18f4 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -549,14 +549,15 @@ def __init__(self, stream, template, lineterminator="\r\n"):
 
         two = '##{key}=<ID={0},Description="{1}">\n'
         four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
-        contig_format = '##contig=<ID={ID},length={length},assembly={assembly}>\n'
         _num = self._fix_field_count
         for (key, vals) in template.metadata.iteritems():
             if key in SINGULAR_METADATA:
                 vals = [vals]
             for val in vals:
-                if key == "contig":
-                    stream.write(contig_format.format(**val))
+                if isinstance(val, dict):
+                    values = ','.join('{0}={1}'.format(key, value)
+                                      for key, value in val.items())
+                    stream.write('##{0}=<{1}>\n'.format(key, values))
                 else:
                     stream.write('##{0}={1}\n'.format(key, val))
         for line in template.infos.itervalues():
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index a88f4ef..73a2c22 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -221,7 +221,7 @@ def testWrite(self):
         out_str = out.getvalue()
         for line in out_str.split("\n"):
             if line.startswith("##contig"):
-                assert "<ID=" in line, "Found dictionary in contig line: {0}".format(line)
+                assert line.startswith('##contig=<'), "Found dictionary in contig line: {0}".format(line)
         print (out_str)
         reader2 = vcf.Reader(out)
 
@@ -257,6 +257,27 @@ def testWrite(self):
             self.assertEquals(l.samples, r.samples)
 
 
+class TestWriterDictionaryMeta(unittest.TestCase):
+
+    def testWrite(self):
+
+        reader = vcf.Reader(fh('example-4.1-bnd.vcf'))
+        out = StringIO()
+        writer = vcf.Writer(out, reader)
+
+        records = list(reader)
+
+        for record in records:
+            writer.write_record(record)
+        out.seek(0)
+        out_str = out.getvalue()
+        for line in out_str.split("\n"):
+            if line.startswith("##PEDIGREE"):
+                assert line.startswith('##PEDIGREE=<'), "Found dictionary in meta line: {0}".format(line)
+            if line.startswith("##SAMPLE"):
+                assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
+
+
 class TestRecord(unittest.TestCase):
 
     def test_num_calls(self):
@@ -789,6 +810,7 @@ def test_trim(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From 1225561706d241e7fe5acf8eb9b67da70cc134d0 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 10 Jan 2013 23:46:50 +0100
Subject: [PATCH 034/168] Preserve order in meta lines with dictionary value

---
 vcf/parser.py        | 2 +-
 vcf/test/test_vcf.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 4dd18f4..cbec08d 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -158,7 +158,7 @@ def read_meta_hash(self, meta_string):
         # Removing initial hash marks and final equal sign
         key = items[0][2:-1]
         hashItems = items[1].split(',')
-        val = dict(item.split("=") for item in hashItems)
+        val = OrderedDict(item.split("=") for item in hashItems)
         return key, val
 
     def read_meta(self, meta_string):
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 73a2c22..be060c0 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -273,7 +273,7 @@ def testWrite(self):
         out_str = out.getvalue()
         for line in out_str.split("\n"):
             if line.startswith("##PEDIGREE"):
-                assert line.startswith('##PEDIGREE=<'), "Found dictionary in meta line: {0}".format(line)
+                self.assertEquals(line, '##PEDIGREE=<Derived="Tumor",Original="Germline">')
             if line.startswith("##SAMPLE"):
                 assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
 

From 3256c66306b8432eac1ffc7f004f13aa551564d1 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Wed, 16 Jan 2013 11:08:36 +0000
Subject: [PATCH 035/168] add missing cparse implementation of #79

---
 vcf/cparse.pyx | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index 4a473d7..682e6a7 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -48,7 +48,10 @@ def parse_samples(
             if entry_num == 1 or ',' not in vals:
 
                 if entry_type == INTEGER:
-                    sampdat[j] = int(vals)
+                    try:
+                        sampdat[j] = int(vals)
+                    except ValueError:
+                        sampdat[j] = float(vals)
                 elif entry_type == FLOAT or entry_type == NUMERIC:
                     sampdat[j] = float(vals)
                 else:
@@ -62,7 +65,10 @@ def parse_samples(
             vals = vals.split(',')
 
             if entry_type == INTEGER:
-                sampdat[j] = _map(int, vals)
+                try:
+                    sampdat[j] = _map(int, vals)
+                except ValueError:
+                    sampdat[j] = map(float, vals)
             elif entry_type == FLOAT or entry_type == NUMERIC:
                 sampdat[j] = _map(float, vals)
             else:

From 6a64d4b2e27821e77ce495f5899a059415001b0f Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Wed, 16 Jan 2013 11:40:59 +0000
Subject: [PATCH 036/168] version bump to 0.6.3

---
 .gitignore       | 1 +
 docs/HISTORY.rst | 6 ++++++
 vcf/__init__.py  | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index b9e4fea..a18ec95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ docs/_build
 .DS_Store
 vcf/cparse.c
 vcf/cparse.so
+.coverage
diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 3147631..fc3f2b3 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,12 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.3 Release
+-------------
+
+* cython port of #79
+* correct writing of meta lines #84 
+
 0.6.2 Release
 -------------
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index cdd1545..7ab38ee 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.2'
+VERSION = '0.6.3'

From 53548b6a73220e328cb0d891ad25ecf0f1eab4d0 Mon Sep 17 00:00:00 2001
From: James Casbon <james.casbon@popgentech.com>
Date: Thu, 17 Jan 2013 10:01:18 +0000
Subject: [PATCH 037/168] Update .travis.yml

Fix the travis build, hopefully
---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index cdbf63a..47b1002 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@ python:
   - "3.2"
   - "pypy"
 install:
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors pysam argparse ordereddict; fi"
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors pysam; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse ordereddict; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam; fi"
   - python setup.py install
 script: python setup.py test

From 4ce6aff3f6930660bcfe52cdf0e08ff53a9ec969 Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Mon, 28 Jan 2013 12:33:52 +0000
Subject: [PATCH 038/168] handle String INFO fields with multiple values

---
 vcf/parser.py                                 |  5 +++--
 vcf/test/example-4.1-info-multiple-values.vcf |  7 +++++++
 vcf/test/test_vcf.py                          | 16 ++++++++++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 vcf/test/example-4.1-info-multiple-values.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index cbec08d..6d42f88 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -323,12 +323,13 @@ def _parse_info(self, info_str):
                 val = True
             elif entry_type == 'String':
                 try:
-                    val = entry[1]
+                    vals = entry[1].split(',') # commas are reserved characters indicating multiple values
+                    val = self._map(str, vals)
                 except IndexError:
                     val = True
 
             try:
-                if self.infos[ID].num == 1 and entry_type not in ( 'String', 'Flag'):
+                if self.infos[ID].num == 1 and entry_type not in ( 'Flag', ):
                     val = val[0]
             except KeyError:
                 pass
diff --git a/vcf/test/example-4.1-info-multiple-values.vcf b/vcf/test/example-4.1-info-multiple-values.vcf
new file mode 100644
index 0000000..6faf95e
--- /dev/null
+++ b/vcf/test/example-4.1-info-multiple-values.vcf
@@ -0,0 +1,7 @@
+##fileformat=VCFv4.1
+##contig=<ID=Pf3D7_01_v3,length=640851>
+##INFO=<ID=RepeatCopies,Number=.,Type=Float,Description="Number of copies aligned with the consensus pattern">
+##INFO=<ID=RepeatSize,Number=.,Type=Integer,Description="Size of consensus pattern (may differ slightly from the period size)">
+##INFO=<ID=RepeatConsensus,Number=.,Type=String,Description="Repeat consensus sequence">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
+Pf3D7_01_v3	401	.	C	T	53.99	PASS	RepeatCopies=19.3,47.4,14.0;RepeatSize=42,14,56;RepeatConsensus=TCTTATCTTCTTACTTTTCATTCCTTACTCTTACTTACTTAC,TTACTCTTACTTAC,TTACTCTTACTTACTTACTCTTACTTACTTACTCTTACTTACTTACTCTTATCTTC	
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index be060c0..072cfd2 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -571,6 +571,22 @@ def test_qual(self):
             self.assertEqual(expected, qual)
             self.assertEqual(type(expected), qtype)
 
+    def test_info_multiple_values(self):
+        reader = vcf.Reader(fh('example-4.1-info-multiple-values.vcf'))
+        var = reader.next()
+        # check Float type INFO field with multiple values
+        expected = [19.3, 47.4, 14.0]
+        actual = var.INFO['RepeatCopies']
+        self.assertEqual(expected, actual)
+        # check Integer type INFO field with multiple values
+        expected = [42, 14, 56]
+        actual = var.INFO['RepeatSize']
+        self.assertEqual(expected, actual)
+        # check String type INFO field with multiple values
+        expected = ['TCTTATCTTCTTACTTTTCATTCCTTACTCTTACTTACTTAC', 'TTACTCTTACTTAC', 'TTACTCTTACTTACTTACTCTTACTTACTTACTCTTACTTACTTACTCTTATCTTC']
+        actual = var.INFO['RepeatConsensus']
+        self.assertEqual(expected, actual)
+
 
 class TestCall(unittest.TestCase):
 

From 3540bb7feb13e21cb993770966d5d86992d95a44 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Wed, 30 Jan 2013 17:33:34 +0100
Subject: [PATCH 039/168] Update writer unit tests to test call data equality

Samples written by the writer should have the exact same data before and after
they are parsed. Previously this was not tested, since call data equality
testing only checks for the sample name, genotype, and record (and not other
data fields).
---
 vcf/test/test_vcf.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index be060c0..5bf1e6b 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -232,6 +232,11 @@ def testWrite(self):
         for l, r in zip(records, reader2):
             self.assertEquals(l.samples, r.samples)
 
+            # test for call data equality, since equality on the sample calls
+            # may not always mean their data are all equal
+            for l_call, r_call in zip(l.samples, r.samples):
+                self.assertEqual(l_call.data, r_call.data)
+
 
 class TestBcfToolsOutputWriter(unittest.TestCase):
 
@@ -256,6 +261,11 @@ def testWrite(self):
         for l, r in zip(records, reader2):
             self.assertEquals(l.samples, r.samples)
 
+            # test for call data equality, since equality on the sample calls
+            # may not always mean their data are all equal
+            for l_call, r_call in zip(l.samples, r.samples):
+                self.assertEqual(l_call.data, r_call.data)
+
 
 class TestWriterDictionaryMeta(unittest.TestCase):
 

From e3e54843fb6b0b88b97a4d5ea936d9b64e69a2d6 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Tue, 29 Jan 2013 01:21:13 +0100
Subject: [PATCH 040/168] Fix bug that removes sample data when GT field is not
 present

Some programs (e.g. bcftools) may output VCF files whose samples do not have its
GT field value. When dealing with files like these, PyVCF's writer will
(previously) remove all non-GT data and replace it with './.' since the
`_format_sample` function immediately returns upon failing to find GT data.

This fix addresses the issue, so that the Writer keeps any non-GT data intact.
---
 vcf/parser.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index cbec08d..a2e7eaa 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -620,9 +620,30 @@ def _format_info(self, info):
         return ';'.join([self._stringify_pair(x,y) for x, y in info.iteritems()])
 
     def _format_sample(self, fmt, sample):
-        if getattr(sample.data, 'GT', None) is None:
-            return "./."
-        return ':'.join([self._stringify(x) for x in sample.data])
+        try:
+            # Try to get the GT value first.
+            gt = getattr(sample.data, 'GT')
+            # PyVCF stores './.' GT values as None, so we need to revert it back
+            # to './.' when writing.
+            if gt is None:
+                gt = './.'
+        except AttributeError:
+            # Failing that, try to check whether 'GT' is specified in the FORMAT
+            # field. If yes, use the recommended empty value ('./.')
+            if 'GT' in fmt:
+                gt = './.'
+            # Otherwise use an empty string as the value
+            else:
+                gt = ''
+        # If gt is an empty string (i.e. not stored), write all other data
+        if not gt:
+            return ':'.join([self._stringify(x) for x in sample.data])
+        # Otherwise use the GT values from above and combine it with the rest of
+        # the data.
+        # Note that this follows the VCF spec, where GT is always the first
+        # item whenever it is present.
+        else:
+            return ':'.join([gt] + [self._stringify(x) for x in sample.data[1:]])
 
     def _stringify(self, x, none='.', delim=','):
         if type(x) == type([]):

From 10b26fc4d5733d7e7f97336009449ec12160c1c2 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 26 Feb 2013 16:44:07 +0100
Subject: [PATCH 041/168] Record with empty list of samples instead of None

---
 vcf/model.py           |   2 +-
 vcf/test/1kg.sites.vcf | 200 +++++++++++++++++++++++++++++++++++++++++
 vcf/test/test_vcf.py   |  12 +++
 3 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 vcf/test/1kg.sites.vcf

diff --git a/vcf/model.py b/vcf/model.py
index 9748784..7d28506 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -133,7 +133,7 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
         self.alleles = [self.REF]
         self.alleles.extend(self.ALT)
         #: list of ``_Calls`` for each sample ordered as in source VCF
-        self.samples = samples
+        self.samples = samples or []
         self._sample_indexes = sample_indexes
 
     def __eq__(self, other):
diff --git a/vcf/test/1kg.sites.vcf b/vcf/test/1kg.sites.vcf
new file mode 100644
index 0000000..857a944
--- /dev/null
+++ b/vcf/test/1kg.sites.vcf
@@ -0,0 +1,200 @@
+##fileformat=VCFv4.1
+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">
+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">
+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">
+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">
+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">
+##ALT=<ID=DEL,Description="Deletion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">
+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">
+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">
+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">
+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">
+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">
+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">
+##reference=GRCh37
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	10583	rs58108140	G	A	100	PASS	AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21
+1	10611	rs189107123	C	G	100	PASS	AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02
+1	13302	rs180734498	C	T	100	PASS	THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14
+1	13327	rs144762171	G	C	100	PASS	AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04
+1	13957	rs201747181	TC	T	28	PASS	AA=TC;AC=35;AF=0.02;AFR_AF=0.02;AMR_AF=0.02;AN=2184;ASN_AF=0.01;AVGPOST=0.8711;ERATE=0.0065;EUR_AF=0.02;LDAF=0.0788;RSQ=0.2501;THETA=0.0100;VT=INDEL
+1	13980	rs151276478	T	C	100	PASS	AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02
+1	30923	rs140337953	G	T	100	PASS	AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73
+1	46402	rs199681827	C	CTGT	31	PASS	AA=.;AC=8;AF=0.0037;AFR_AF=0.01;AN=2184;ASN_AF=0.0017;AVGPOST=0.8325;ERATE=0.0072;LDAF=0.0903;RSQ=0.0960;THETA=0.0121;VT=INDEL
+1	47190	rs200430748	G	GA	192	PASS	AA=G;AC=29;AF=0.01;AFR_AF=0.06;AMR_AF=0.0028;AN=2184;AVGPOST=0.9041;ERATE=0.0041;LDAF=0.0628;RSQ=0.2883;THETA=0.0153;VT=INDEL
+1	51476	rs187298206	T	C	100	PASS	ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01
+1	51479	rs116400033	T	A	100	PASS	RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22
+1	51914	rs190452223	T	G	100	PASS	ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017
+1	51935	rs181754315	C	T	100	PASS	THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0
+1	51954	rs185832753	G	C	100	PASS	LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01
+1	52058	rs62637813	G	C	100	PASS	AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05
+1	52144	rs190291950	T	A	100	PASS	THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01
+1	52185	rs201374420	TTAA	T	244	PASS	AA=.;AC=10;AF=0.0046;AFR_AF=0.0020;AMR_AF=0.02;AN=2184;ASN_AF=0.0035;AVGPOST=0.9840;ERATE=0.0037;LDAF=0.0124;RSQ=0.4271;THETA=0.0232;VT=INDEL
+1	52238	rs150021059	T	G	100	PASS	THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95
+1	53234	rs199502715	CAT	C	227	PASS	AA=CAT;AC=10;AF=0.0046;AFR_AF=0.02;AMR_AF=0.0028;AN=2184;AVGPOST=0.9936;ERATE=0.0007;LDAF=0.0074;RSQ=0.6237;THETA=0.0119;VT=INDEL
+1	54353	rs140052487	C	A	100	PASS	THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013
+1	54421	rs146477069	A	G	100	PASS	ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02
+1	54490	rs141149254	G	A	100	PASS	ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15
+1	54676	rs2462492	C	T	100	PASS	LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18
+1	54753	rs143174675	T	G	100	PASS	AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03
+1	55164	rs3091274	C	A	100	PASS	AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96
+1	55249	rs200769871	C	CTATGG	443	PASS	AA=C;AC=151;AF=0.07;AFR_AF=0.03;AMR_AF=0.08;AN=2184;ASN_AF=0.16;AVGPOST=0.9073;ERATE=0.0063;EUR_AF=0.02;LDAF=0.0968;RSQ=0.5891;THETA=0.0038;VT=INDEL
+1	55299	rs10399749	C	T	100	PASS	RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13
+1	55313	rs182462964	A	T	100	PASS	ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020
+1	55326	rs3107975	T	C	100	PASS	AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01
+1	55330	rs185215913	G	A	100	PASS	ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020
+1	55367	rs190850374	G	A	100	PASS	ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01
+1	55388	rs182711216	C	T	100	PASS	THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017
+1	55394	rs2949420	T	A	100	PASS	AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02
+1	55416	rs193242050	G	A	100	PASS	AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02
+1	55427	rs183189405	T	C	100	PASS	THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020
+1	55816	rs187434873	G	A	100	PASS	AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01
+1	55850	rs191890754	C	G	100	PASS	AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01
+1	55852	rs184233019	G	C	100	PASS	THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013
+1	56644	rs143342222	A	C	100	PASS	AN=2184;AVGPOST=0.9962;LDAF=0.0040;ERATE=0.0024;VT=SNP;AA=A;RSQ=0.5700;AC=5;SNPSOURCE=LOWCOV;THETA=0.0117;AF=0.0023;AFR_AF=0.01
+1	57952	rs189727433	A	C	100	PASS	AA=C;ERATE=0.0085;AN=2184;LDAF=0.7878;VT=SNP;THETA=0.0076;RSQ=0.4712;AC=1902;SNPSOURCE=LOWCOV;AVGPOST=0.7578;AF=0.87;ASN_AF=0.98;AMR_AF=0.91;AFR_AF=0.64;EUR_AF=0.91
+1	58814	rs114420996	G	A	100	PASS	AC=223;THETA=0.0032;AA=G;AN=2184;RSQ=0.9087;LDAF=0.1074;VT=SNP;SNPSOURCE=LOWCOV;ERATE=0.0006;AVGPOST=0.9777;AF=0.10;ASN_AF=0.03;AMR_AF=0.17;AFR_AF=0.20;EUR_AF=0.06
+1	59040	rs149755937	T	C	100	PASS	AVGPOST=0.9710;AC=115;AA=T;AN=2184;RSQ=0.8248;VT=SNP;ERATE=0.0017;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.0613;AF=0.05;ASN_AF=0.03;AMR_AF=0.15;AFR_AF=0.0041;EUR_AF=0.06
+1	60726	rs192328835	C	A	100	PASS	AVGPOST=0.9092;AN=2184;RSQ=0.5988;ERATE=0.0081;AC=144;VT=SNP;THETA=0.0045;AA=A;SNPSOURCE=LOWCOV;LDAF=0.0959;AF=0.07;ASN_AF=0.05;AMR_AF=0.10;AFR_AF=0.11;EUR_AF=0.03
+1	61442	rs74970982	A	G	100	PASS	LDAF=0.9152;AA=G;AN=2184;VT=SNP;ERATE=0.0026;RSQ=0.4867;AVGPOST=0.9004;SNPSOURCE=LOWCOV;THETA=0.0013;AC=2084;AF=0.95;ASN_AF=1.00;AMR_AF=0.97;AFR_AF=0.84;EUR_AF=0.99
+1	61462	rs56992750	T	A	100	PASS	THETA=0.0023;LDAF=0.0378;RSQ=0.7396;AA=T;AN=2184;AVGPOST=0.9773;VT=SNP;AC=68;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.03;AMR_AF=0.02;AFR_AF=0.13
+1	61743	rs184286948	G	C	100	PASS	AVGPOST=0.9939;LDAF=0.0047;AA=G;AN=2184;VT=SNP;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=4;THETA=0.0016;RSQ=0.4838;AF=0.0018;AMR_AF=0.01;EUR_AF=0.0026
+1	61987	rs76735897	A	G	100	PASS	THETA=0.0015;AN=2184;AC=569;VT=SNP;AA=A;RSQ=0.7192;AVGPOST=0.8533;LDAF=0.2944;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.26;ASN_AF=0.07;AMR_AF=0.31;AFR_AF=0.25;EUR_AF=0.39
+1	61989	rs77573425	G	C	100	PASS	RSQ=0.7254;AVGPOST=0.8584;AA=G;AN=2184;LDAF=0.2849;VT=SNP;AC=555;THETA=0.0019;SNPSOURCE=LOWCOV;ERATE=0.0007;AF=0.25;ASN_AF=0.07;AMR_AF=0.31;AFR_AF=0.22;EUR_AF=0.39
+1	61993	rs190553843	C	T	100	PASS	AC=7;RSQ=0.6106;AA=C;THETA=0.0143;AN=2184;ERATE=0.0009;VT=SNP;AVGPOST=0.9953;SNPSOURCE=LOWCOV;LDAF=0.0050;AF=0.0032;AFR_AF=0.01
+1	62156	rs181864839	C	T	100	PASS	ERATE=0.0005;AA=C;AN=2184;AVGPOST=0.9979;LDAF=0.0015;VT=SNP;THETA=0.0094;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4561;AF=0.0005;AFR_AF=0.0020
+1	62157	rs10399597	G	A	100	PASS	AVGPOST=0.9945;AA=G;AN=2184;ERATE=0.0025;VT=SNP;RSQ=0.5217;AC=5;THETA=0.0066;SNPSOURCE=LOWCOV;LDAF=0.0050;AF=0.0023;AFR_AF=0.01
+1	62162	rs140556834	G	A	100	PASS	AA=G;AN=2184;AC=8;LDAF=0.0057;VT=SNP;THETA=0.0018;ERATE=0.0017;RSQ=0.6089;AVGPOST=0.9948;SNPSOURCE=LOWCOV;AF=0.0037;AMR_AF=0.0028;AFR_AF=0.01;EUR_AF=0.0013
+1	63276	rs185977555	G	A	100	PASS	RSQ=0.2744;AA=G;AN=2184;AVGPOST=0.9947;VT=SNP;ERATE=0.0010;SNPSOURCE=LOWCOV;AC=1;THETA=0.0010;LDAF=0.0031;AF=0.0005;AFR_AF=0.0020
+1	63297	rs188886746	G	A	100	PASS	ERATE=0.0005;AVGPOST=0.9986;AA=G;AN=2184;VT=SNP;AC=0;SNPSOURCE=LOWCOV;RSQ=0.2459;THETA=0.0024;LDAF=0.0008;AF=0
+1	63671	rs116440577	G	A	100	PASS	AA=G;AN=2184;ERATE=0.0047;LDAF=0.1773;VT=SNP;THETA=0.0072;AC=369;SNPSOURCE=LOWCOV;RSQ=0.8980;AVGPOST=0.9652;AF=0.17;ASN_AF=0.05;AMR_AF=0.22;AFR_AF=0.35;EUR_AF=0.11
+1	63735	rs201888535	CCTA	C	455	PASS	AA=CCTA;AC=829;AF=0.38;AFR_AF=0.13;AMR_AF=0.33;AN=2184;ASN_AF=0.69;AVGPOST=0.7654;ERATE=0.0047;EUR_AF=0.34;LDAF=0.4128;RSQ=0.6424;THETA=0.0062;VT=INDEL
+1	64649	rs181431124	A	C	100	PASS	RSQ=0.6975;AN=2184;VT=SNP;AA=.;ERATE=0.0008;AVGPOST=0.9918;SNPSOURCE=LOWCOV;AC=21;THETA=0.0024;LDAF=0.0114;AF=0.01;AMR_AF=0.01;EUR_AF=0.03
+1	66162	rs62639105	A	T	100	PASS	THETA=0.0026;ERATE=0.0166;LDAF=0.3089;AN=2184;VT=SNP;AA=.;AC=544;SNPSOURCE=LOWCOV;RSQ=0.5681;AVGPOST=0.7777;AF=0.25;ASN_AF=0.07;AMR_AF=0.30;AFR_AF=0.23;EUR_AF=0.38
+1	66176	rs28552463	T	A	100	PASS	AN=2184;RSQ=0.4451;VT=SNP;AA=.;THETA=0.0095;LDAF=0.0631;AC=70;SNPSOURCE=LOWCOV;ERATE=0.0061;AVGPOST=0.9210;AF=0.03;ASN_AF=0.0017;AMR_AF=0.01;AFR_AF=0.13;EUR_AF=0.0013
+1	66219	rs181028663	A	T	100	PASS	LDAF=0.1137;ERATE=0.0074;AN=2184;VT=SNP;AA=.;AC=68;THETA=0.0059;RSQ=0.2946;AVGPOST=0.8268;SNPSOURCE=LOWCOV;AF=0.03;ASN_AF=0.08;AMR_AF=0.04;AFR_AF=0.01;EUR_AF=0.01
+1	66331	rs186063952	A	C	100	PASS	THETA=0.0126;AVGPOST=0.7656;RSQ=0.1616;AN=2184;LDAF=0.1387;ERATE=0.0093;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=42;AF=0.02;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.07
+1	66442	rs192044252	T	A	100	PASS	RSQ=0.1763;AVGPOST=0.7894;AN=2184;THETA=0.0031;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=36;ERATE=0.0107;LDAF=0.1241;AF=0.02;ASN_AF=0.0035;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.01
+1	66457	rs13328655	T	A	100	PASS	ERATE=0.0085;AN=2184;VT=SNP;AA=.;AC=31;AVGPOST=0.8340;LDAF=0.0957;RSQ=0.1836;SNPSOURCE=LOWCOV;THETA=0.0024;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.03;EUR_AF=0.01
+1	66507	rs12401368	T	A	100	PASS	ERATE=0.0197;AN=2184;VT=SNP;AA=.;THETA=0.0122;SNPSOURCE=LOWCOV;AC=170;RSQ=0.2110;LDAF=0.2457;AVGPOST=0.6536;AF=0.08;ASN_AF=0.07;AMR_AF=0.09;AFR_AF=0.05;EUR_AF=0.09
+1	67179	rs149952626	C	G	100	PASS	AVGPOST=0.9946;AN=2184;VT=SNP;AA=.;THETA=0.0046;SNPSOURCE=LOWCOV;ERATE=0.0012;AC=11;RSQ=0.6333;LDAF=0.0069;AF=0.01;ASN_AF=0.02
+1	67181	rs77662731	A	G	100	PASS	AVGPOST=0.9817;THETA=0.0096;ERATE=0.0013;AN=2184;RSQ=0.8542;AC=104;LDAF=0.0529;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AF=0.05;AMR_AF=0.02;AFR_AF=0.20
+1	69511	rs75062661	A	G	100	PASS	LDAF=0.6051;AC=1424;ERATE=0.0237;AN=2184;RSQ=0.5669;VT=SNP;AA=.;AVGPOST=0.7173;SNPSOURCE=LOWCOV;THETA=0.0052;AF=0.65;ASN_AF=0.87;AMR_AF=0.65;AFR_AF=0.33;EUR_AF=0.70
+1	69534	rs190717287	T	C	100	PASS	AVGPOST=0.9986;LDAF=0.0013;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4002;THETA=0.0016;ERATE=0.0006;AF=0.0005;ASN_AF=0.0017
+1	69536	rs200013390	C	T	100	PASS	AA=.;AC=0;AF=0;AN=2184;AVGPOST=0.9986;ERATE=0.0006;LDAF=0.0008;RSQ=0.0677;SNPSOURCE=EXOME;THETA=0.0087;VT=SNP
+1	72119	rs199639004	G	GTA	158	PASS	AA=.;AC=8;AF=0.0037;AMR_AF=0.0028;AN=2184;ASN_AF=0.01;AVGPOST=0.9589;ERATE=0.0026;EUR_AF=0.0013;LDAF=0.0243;RSQ=0.2268;THETA=0.0016;VT=INDEL
+1	72148	rs182862337	C	T	100	PASS	AN=2184;RSQ=0.2794;THETA=0.0130;VT=SNP;AA=.;LDAF=0.0019;AVGPOST=0.9971;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0007;AF=0.0005;AMR_AF=0.0028
+1	72297	rs200651397	G	GTAT	160	PASS	AA=G;AC=19;AF=0.01;AMR_AF=0.02;AN=2184;ASN_AF=0.01;AVGPOST=0.9383;ERATE=0.0055;EUR_AF=0.01;LDAF=0.0399;RSQ=0.3194;THETA=0.0064;VT=INDEL
+1	73841	rs143773730	C	T	100	PASS	ERATE=0.0303;THETA=0.0044;AN=2184;AVGPOST=0.8178;RSQ=0.5832;VT=SNP;AA=.;SNPSOURCE=LOWCOV;LDAF=0.2588;AC=425;AF=0.19;ASN_AF=0.15;AMR_AF=0.22;AFR_AF=0.17;EUR_AF=0.23
+1	77462	rs188023513	G	A	100	PASS	LDAF=0.1685;AN=2184;AVGPOST=0.8149;VT=SNP;AA=.;RSQ=0.4624;AC=198;THETA=0.0100;SNPSOURCE=LOWCOV;ERATE=0.0222;AF=0.09;ASN_AF=0.11;AMR_AF=0.12;AFR_AF=0.08;EUR_AF=0.07
+1	77470	rs192898053	T	C	100	PASS	LDAF=0.0047;AN=2184;VT=SNP;AA=.;ERATE=0.0011;AVGPOST=0.9918;THETA=0.0025;RSQ=0.1818;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;AFR_AF=0.0020
+1	77874	rs184538873	G	A	100	PASS	THETA=0.0068;LDAF=0.0516;AN=2184;VT=SNP;AA=.;AVGPOST=0.9594;ERATE=0.0011;AC=87;SNPSOURCE=LOWCOV;RSQ=0.6970;AF=0.04;ASN_AF=0.01;AMR_AF=0.12;AFR_AF=0.0041;EUR_AF=0.04
+1	77961	rs78385339	G	A	100	PASS	AVGPOST=0.9114;AN=2184;VT=SNP;AA=.;THETA=0.0072;RSQ=0.6667;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=192;LDAF=0.1180;AF=0.09;ASN_AF=0.20;AMR_AF=0.14;AFR_AF=0.01;EUR_AF=0.03
+1	79033	rs62641298	A	G	100	PASS	AVGPOST=0.7371;THETA=0.0022;LDAF=0.7962;AN=2184;ERATE=0.0054;VT=SNP;AA=.;AC=1961;SNPSOURCE=LOWCOV;RSQ=0.3963;AF=0.90;ASN_AF=0.98;AMR_AF=0.95;AFR_AF=0.65;EUR_AF=0.97
+1	79050	rs62641299	G	T	100	PASS	AC=1871;AN=2184;THETA=0.0031;RSQ=0.3928;VT=SNP;AA=.;AVGPOST=0.6803;SNPSOURCE=LOWCOV;LDAF=0.7318;ERATE=0.0107;AF=0.86;ASN_AF=0.98;AMR_AF=0.93;AFR_AF=0.54;EUR_AF=0.94
+1	79137	rs143777184	A	T	100	PASS	AN=2184;AC=55;ERATE=0.0009;AVGPOST=0.9773;LDAF=0.0324;VT=SNP;AA=.;THETA=0.0091;SNPSOURCE=LOWCOV;RSQ=0.7309;AF=0.03;AMR_AF=0.01;AFR_AF=0.10
+1	79417	rs184768190	C	T	100	PASS	ERATE=0.0005;THETA=0.0166;AN=2184;RSQ=0.5026;AVGPOST=0.9975;VT=SNP;AA=.;LDAF=0.0022;SNPSOURCE=LOWCOV;AC=2;AF=0.0009;ASN_AF=0.0035
+1	79772	rs147215883	C	G	100	PASS	LDAF=0.1066;AN=2184;THETA=0.0138;RSQ=0.7199;VT=SNP;AA=.;AVGPOST=0.9271;AC=176;ERATE=0.0011;SNPSOURCE=LOWCOV;AF=0.08;ASN_AF=0.07;AMR_AF=0.06;AFR_AF=0.10;EUR_AF=0.09
+1	79872	rs189224661	T	G	100	PASS	THETA=0.0054;AN=2184;LDAF=0.0057;VT=SNP;AA=.;ERATE=0.0017;AC=9;AVGPOST=0.9956;SNPSOURCE=LOWCOV;RSQ=0.6548;AF=0.0041;AFR_AF=0.02
+1	80454	rs144226842	G	C	100	PASS	RSQ=0.6549;LDAF=0.0035;AN=2184;AVGPOST=0.9975;VT=SNP;AA=.;AC=5;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0015;AF=0.0023;ASN_AF=0.01
+1	81949	rs181567186	T	C	100	PASS	AN=2184;ERATE=0.0009;VT=SNP;AA=.;AVGPOST=0.9948;LDAF=0.0030;SNPSOURCE=LOWCOV;AC=1;THETA=0.0052;RSQ=0.2129;AF=0.0005;ASN_AF=0.0017
+1	82163	rs139113303	G	A	100	PASS	AN=2184;LDAF=0.0375;ERATE=0.0009;VT=SNP;AA=.;RSQ=0.7842;AC=66;THETA=0.0053;SNPSOURCE=LOWCOV;AVGPOST=0.9761;AF=0.03;ASN_AF=0.0017;AMR_AF=0.01;AFR_AF=0.0020;EUR_AF=0.08
+1	82249	rs1851945	A	G	100	PASS	THETA=0.0137;LDAF=0.0712;AVGPOST=0.9150;AN=2184;VT=SNP;AA=.;RSQ=0.4689;AC=75;ERATE=0.0116;SNPSOURCE=LOWCOV;AF=0.03;ASN_AF=0.03;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.04
+1	82609	rs149189449	C	G	100	PASS	ERATE=0.0005;AN=2184;LDAF=0.0364;VT=SNP;AA=.;AC=68;AVGPOST=0.9822;RSQ=0.8408;SNPSOURCE=LOWCOV;THETA=0.0024;AF=0.03;AMR_AF=0.02;AFR_AF=0.0020;EUR_AF=0.08
+1	82676	rs185237834	T	G	100	PASS	LDAF=0.1144;AN=2184;AVGPOST=0.9264;VT=SNP;AA=.;RSQ=0.7176;AC=198;THETA=0.0025;SNPSOURCE=LOWCOV;ERATE=0.0056;AF=0.09;ASN_AF=0.07;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.10
+1	82734	rs4030331	T	C	100	PASS	AN=2184;THETA=0.0008;VT=SNP;AA=.;ERATE=0.0158;RSQ=0.6316;AVGPOST=0.8280;LDAF=0.2433;SNPSOURCE=LOWCOV;AC=435;AF=0.20;ASN_AF=0.15;AMR_AF=0.28;AFR_AF=0.24;EUR_AF=0.17
+1	82957	rs189774606	C	T	100	PASS	RSQ=0.5163;AN=2184;VT=SNP;AA=.;LDAF=0.0072;THETA=0.0028;AC=9;AVGPOST=0.9918;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.0041;AMR_AF=0.01;AFR_AF=0.01
+1	83084	rs181193408	T	A	100	PASS	AN=2184;AVGPOST=0.8261;VT=SNP;AA=.;RSQ=0.5750;AC=1914;LDAF=0.8278;SNPSOURCE=LOWCOV;ERATE=0.0061;THETA=0.0064;AF=0.88;ASN_AF=0.99;AMR_AF=0.92;AFR_AF=0.58;EUR_AF=0.96
+1	83088	rs186081601	G	C	100	PASS	ERATE=0.0013;AN=2184;LDAF=0.0043;VT=SNP;AA=.;AVGPOST=0.9922;RSQ=0.1618;THETA=0.0019;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;AFR_AF=0.0020
+1	83771	rs189906733	T	G	100	PASS	RSQ=0.6473;AN=2184;AVGPOST=0.9871;VT=SNP;AA=.;AC=24;ERATE=0.0011;SNPSOURCE=LOWCOV;THETA=0.0043;LDAF=0.0158;AF=0.01;AMR_AF=0.01;AFR_AF=0.04;EUR_AF=0.0013
+1	83977	rs180759811	A	G	100	PASS	AN=2184;ERATE=0.0009;VT=SNP;AA=.;THETA=0.0059;LDAF=0.0038;RSQ=0.2074;SNPSOURCE=LOWCOV;AC=1;AVGPOST=0.9932;AF=0.0005;AFR_AF=0.0020
+1	84002	rs28850140	G	A	100	PASS	THETA=0.0050;ERATE=0.0211;AN=2184;AC=236;VT=SNP;AA=.;AVGPOST=0.8144;LDAF=0.1921;SNPSOURCE=LOWCOV;RSQ=0.4810;AF=0.11;ASN_AF=0.12;AMR_AF=0.15;AFR_AF=0.07;EUR_AF=0.11
+1	84005	rs202079949	AG	A	78	PASS	AA=.;AC=52;AF=0.02;AFR_AF=0.02;AMR_AF=0.03;AN=2184;ASN_AF=0.01;AVGPOST=0.9360;ERATE=0.0049;EUR_AF=0.04;LDAF=0.0514;RSQ=0.4690;THETA=0.0005;VT=INDEL
+1	84010	rs186443818	G	A	100	PASS	AVGPOST=0.9169;AN=2184;VT=SNP;AA=.;AC=97;THETA=0.0087;LDAF=0.0789;SNPSOURCE=LOWCOV;ERATE=0.0061;RSQ=0.5318;AF=0.04;ASN_AF=0.03;AMR_AF=0.05;AFR_AF=0.03;EUR_AF=0.06
+1	84079	rs190867312	T	C	100	PASS	ERATE=0.0021;AN=2184;AC=6;VT=SNP;AA=.;LDAF=0.0049;AVGPOST=0.9956;RSQ=0.5906;SNPSOURCE=LOWCOV;THETA=0.0016;AF=0.0027;AMR_AF=0.0028;AFR_AF=0.01
+1	84139	rs183605470	A	T	100	PASS	THETA=0.0023;AC=28;AN=2184;RSQ=0.6469;VT=SNP;AA=.;LDAF=0.0180;SNPSOURCE=LOWCOV;AVGPOST=0.9835;ERATE=0.0006;AF=0.01;ASN_AF=0.0017;AMR_AF=0.07;AFR_AF=0.0041
+1	84156	rs188652299	A	C	100	PASS	THETA=0.0009;AVGPOST=0.9936;ERATE=0.0014;RSQ=0.3359;AN=2184;VT=SNP;AA=.;LDAF=0.0044;SNPSOURCE=LOWCOV;AC=3;AF=0.0014;AMR_AF=0.0028;AFR_AF=0.0041
+1	84244	rs191297051	A	C	100	PASS	LDAF=0.1204;AN=2184;VT=SNP;AA=.;AVGPOST=0.9398;RSQ=0.7828;THETA=0.0025;ERATE=0.0018;SNPSOURCE=LOWCOV;AC=222;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.14;EUR_AF=0.11
+1	84295	rs183209871	G	A	100	PASS	LDAF=0.0067;AVGPOST=0.9946;AN=2184;THETA=0.0038;VT=SNP;AA=.;AC=9;SNPSOURCE=LOWCOV;ERATE=0.0007;RSQ=0.6599;AF=0.0041;AMR_AF=0.01;EUR_AF=0.01
+1	84346	rs187855973	T	C	100	PASS	THETA=0.0044;AN=2184;AVGPOST=0.9981;VT=SNP;AA=.;LDAF=0.0014;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0007;RSQ=0.3659;AF=0.0005;EUR_AF=0.0013
+1	84453	rs191379015	C	G	100	PASS	LDAF=0.0021;RSQ=0.2866;AN=2184;VT=SNP;AA=.;THETA=0.0018;ERATE=0.0008;SNPSOURCE=LOWCOV;AC=1;AVGPOST=0.9968;AF=0.0005;AMR_AF=0.0028
+1	84705	rs183470350	T	G	100	PASS	LDAF=0.0033;AVGPOST=0.9943;AN=2184;VT=SNP;AA=.;THETA=0.0030;RSQ=0.2658;SNPSOURCE=LOWCOV;AC=2;ERATE=0.0007;AF=0.0009;AMR_AF=0.0028;EUR_AF=0.0013
+1	85063	rs187802690	T	C	100	PASS	THETA=0.0093;AN=2184;VT=SNP;AA=.;ERATE=0.0051;LDAF=0.0255;RSQ=0.6868;AVGPOST=0.9806;AC=38;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02
+1	85597	rs192472955	A	C	100	PASS	AC=145;AVGPOST=0.9322;AN=2184;LDAF=0.0880;VT=SNP;AA=.;RSQ=0.6993;SNPSOURCE=LOWCOV;THETA=0.0020;ERATE=0.0022;AF=0.07;AMR_AF=0.07;AFR_AF=0.11;EUR_AF=0.09
+1	85622	rs185273034	A	T	100	PASS	ERATE=0.0005;AVGPOST=0.9963;AN=2184;RSQ=0.5194;VT=SNP;AA=.;THETA=0.0174;LDAF=0.0034;SNPSOURCE=LOWCOV;AC=4;AF=0.0018;AFR_AF=0.01
+1	85892	rs147185795	A	G	100	PASS	AVGPOST=0.9936;RSQ=0.7759;AN=2184;VT=SNP;AA=.;LDAF=0.0122;SNPSOURCE=LOWCOV;AC=21;THETA=0.0116;ERATE=0.0007;AF=0.01;AMR_AF=0.0028;AFR_AF=0.04
+1	86000	rs140628094	A	C	100	PASS	AN=2184;LDAF=0.0062;VT=SNP;AA=.;AC=10;THETA=0.0018;ERATE=0.0008;RSQ=0.7700;SNPSOURCE=LOWCOV;AVGPOST=0.9968;AF=0.0046;AFR_AF=0.02
+1	86018	rs142878000	C	G	100	PASS	ERATE=0.0036;RSQ=0.7867;AVGPOST=0.9429;AN=2184;AC=213;LDAF=0.1166;VT=SNP;AA=.;THETA=0.0030;SNPSOURCE=LOWCOV;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11
+1	86028	rs114608975	T	C	100	PASS	ERATE=0.0005;AC=73;AN=2184;RSQ=0.8713;VT=SNP;AA=.;THETA=0.0108;SNPSOURCE=LOWCOV;AVGPOST=0.9841;LDAF=0.0388;AF=0.03;AMR_AF=0.02;AFR_AF=0.0041;EUR_AF=0.08
+1	86064	rs190167736	G	A	100	PASS	ERATE=0.0004;AN=2184;VT=SNP;AA=.;THETA=0.0081;SNPSOURCE=LOWCOV;AC=1;RSQ=0.5628;AVGPOST=0.9992;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020
+1	86065	rs116504101	G	C	100	PASS	ERATE=0.0005;LDAF=0.0398;AN=2184;AVGPOST=0.9846;VT=SNP;AA=.;AC=76;THETA=0.0057;RSQ=0.8725;SNPSOURCE=LOWCOV;AF=0.03;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.09
+1	86282	rs192830046	T	G	100	PASS	LDAF=0.0036;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;ERATE=0.0012;RSQ=0.2764;AVGPOST=0.9941;AC=2;THETA=0.0034;AF=0.0009;AMR_AF=0.0028;EUR_AF=0.0013
+1	86303	rs2949417	G	T	100	PASS	THETA=0.0021;RSQ=0.8008;LDAF=0.1194;AN=2184;AC=214;VT=SNP;AA=.;AVGPOST=0.9465;SNPSOURCE=LOWCOV;ERATE=0.0007;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11
+1	86331	rs115209712	A	G	100	PASS	THETA=0.0047;AN=2184;VT=SNP;AA=.;AC=216;LDAF=0.1195;RSQ=0.8119;ERATE=0.0008;AVGPOST=0.9495;SNPSOURCE=LOWCOV;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11
+1	86982	rs184970101	G	A	100	PASS	THETA=0.0050;AN=2184;AVGPOST=0.9979;LDAF=0.0015;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;RSQ=0.3541;AF=0.0005;AFR_AF=0.0020
+1	87021	rs188486692	T	C	100	PASS	AN=2184;RSQ=0.4348;VT=SNP;AA=.;THETA=0.0112;AVGPOST=0.9687;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=19;LDAF=0.0221;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02
+1	87114	rs200095900	CT	C	192	PASS	AA=.;AC=8;AF=0.0037;AFR_AF=0.02;AN=2184;AVGPOST=0.9976;ERATE=0.0010;LDAF=0.0042;RSQ=0.7479;THETA=0.0149;VT=INDEL
+1	87190	rs1524602	G	A	100	PASS	AN=2184;LDAF=0.2822;VT=SNP;AA=.;RSQ=0.7549;THETA=0.0148;AC=540;SNPSOURCE=LOWCOV;ERATE=0.0096;AVGPOST=0.8739;AF=0.25;ASN_AF=0.29;AMR_AF=0.35;AFR_AF=0.38;EUR_AF=0.08
+1	87360	rs180907504	C	T	100	PASS	THETA=0.0014;AN=2184;ERATE=0.0025;RSQ=0.3869;VT=SNP;AA=.;AC=14;LDAF=0.0170;SNPSOURCE=LOWCOV;AVGPOST=0.9768;AF=0.01;ASN_AF=0.02;AMR_AF=0.0028;AFR_AF=0.01
+1	87409	rs139490478	C	T	100	PASS	AN=2184;AC=80;RSQ=0.8364;AVGPOST=0.9797;THETA=0.0075;VT=SNP;AA=.;ERATE=0.0011;SNPSOURCE=LOWCOV;LDAF=0.0438;AF=0.04;ASN_AF=0.0017;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.09
+1	87590	rs185279164	G	A	100	PASS	THETA=0.0068;ERATE=0.0005;AN=2184;VT=SNP;AA=.;LDAF=0.0026;RSQ=0.6866;SNPSOURCE=LOWCOV;AC=4;AVGPOST=0.9982;AF=0.0018;AFR_AF=0.01
+1	87647	rs146836579	T	C	100	PASS	AN=2184;AC=111;THETA=0.0041;VT=SNP;AA=.;LDAF=0.0558;AVGPOST=0.9811;SNPSOURCE=LOWCOV;ERATE=0.0015;RSQ=0.8636;AF=0.05;AMR_AF=0.03;AFR_AF=0.20
+1	87755	rs140735660	G	A	100	PASS	ERATE=0.0027;RSQ=0.5060;AN=2184;AC=16;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AVGPOST=0.9847;LDAF=0.0138;THETA=0.0069;AF=0.01;AFR_AF=0.03
+1	87970	rs189643077	T	C	100	PASS	ERATE=0.0005;AN=2184;RSQ=0.5846;VT=SNP;AA=.;AVGPOST=0.9976;THETA=0.0053;LDAF=0.0025;SNPSOURCE=LOWCOV;AC=3;AF=0.0014;AFR_AF=0.01
+1	87978	rs182297743	G	A	100	PASS	AVGPOST=0.9963;THETA=0.0074;AN=2184;VT=SNP;AA=.;LDAF=0.0023;RSQ=0.2883;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;AF=0.0005;AMR_AF=0.0028
+1	88136	rs59529791	G	A	100	PASS	RSQ=0.8406;AN=2184;VT=SNP;AA=.;ERATE=0.0010;THETA=0.0059;AVGPOST=0.9778;SNPSOURCE=LOWCOV;AC=106;LDAF=0.0548;AF=0.05;AMR_AF=0.03;AFR_AF=0.20
+1	88169	rs940550	C	T	100	PASS	RSQ=0.7811;AN=2184;VT=SNP;AA=.;THETA=0.0055;LDAF=0.2576;ERATE=0.0018;SNPSOURCE=LOWCOV;AC=506;AVGPOST=0.8932;AF=0.23;ASN_AF=0.29;AMR_AF=0.33;AFR_AF=0.33;EUR_AF=0.08
+1	88172	rs940551	G	A	100	PASS	RSQ=0.7703;AVGPOST=0.9669;LDAF=0.0483;AN=2184;ERATE=0.0009;VT=SNP;AA=.;THETA=0.0027;SNPSOURCE=LOWCOV;AC=86;AF=0.04;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.09
+1	88177	rs143215837	G	C	100	PASS	ERATE=0.0004;AN=2184;LDAF=0.0456;VT=SNP;AA=.;AVGPOST=0.9686;AC=82;THETA=0.0089;SNPSOURCE=LOWCOV;RSQ=0.7787;AF=0.04;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.09
+1	88188	rs148331237	C	A	100	PASS	THETA=0.0039;AN=2184;VT=SNP;AA=.;AC=9;LDAF=0.0085;SNPSOURCE=LOWCOV;ERATE=0.0007;RSQ=0.5212;AVGPOST=0.9910;AF=0.0041;AMR_AF=0.0028;EUR_AF=0.01
+1	88236	rs186918018	C	T	100	PASS	AVGPOST=0.9904;AN=2184;ERATE=0.0031;RSQ=0.5511;VT=SNP;AA=.;THETA=0.0087;LDAF=0.0097;SNPSOURCE=LOWCOV;AC=11;AF=0.01;AMR_AF=0.02;AFR_AF=0.0041;EUR_AF=0.0026
+1	88250	rs191950833	T	A	100	PASS	LDAF=0.0013;AN=2184;RSQ=0.1387;VT=SNP;AA=.;AC=0;THETA=0.0019;SNPSOURCE=LOWCOV;ERATE=0.0007;AVGPOST=0.9974;AF=0
+1	88316	rs113759966	G	A	100	PASS	LDAF=0.0531;AN=2184;VT=SNP;AA=.;THETA=0.0071;RSQ=0.7791;AVGPOST=0.9644;ERATE=0.0008;AC=87;SNPSOURCE=LOWCOV;AF=0.04;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.01;EUR_AF=0.09
+1	88324	rs183326616	A	G	100	PASS	ERATE=0.0004;AN=2184;AVGPOST=0.9996;VT=SNP;AA=.;RSQ=0.7073;LDAF=0.0006;SNPSOURCE=LOWCOV;AC=1;THETA=0.0092;AF=0.0005;AFR_AF=0.0020
+1	88338	rs55700207	G	A	100	PASS	THETA=0.0035;RSQ=0.7967;AN=2184;ERATE=0.0034;LDAF=0.1019;VT=SNP;AA=.;AC=186;SNPSOURCE=LOWCOV;AVGPOST=0.9507;AF=0.09;ASN_AF=0.03;AMR_AF=0.15;AFR_AF=0.14;EUR_AF=0.05
+1	88370	rs185487977	G	A	100	PASS	AVGPOST=0.9957;LDAF=0.0035;AN=2184;ERATE=0.0009;RSQ=0.4507;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=4;THETA=0.0043;AF=0.0018;AMR_AF=0.0028;EUR_AF=0.0040
+1	88376	rs189954431	T	G	100	PASS	RSQ=0.6404;AVGPOST=0.9994;AN=2184;VT=SNP;AA=.;THETA=0.0057;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0003;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020
+1	88388	rs182344336	C	T	100	PASS	THETA=0.0048;ERATE=0.0005;RSQ=0.3843;AVGPOST=0.9977;LDAF=0.0016;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017
+1	88429	rs146027550	T	C	100	PASS	LDAF=0.0083;AN=2184;AC=13;RSQ=0.6097;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9922;SNPSOURCE=LOWCOV;THETA=0.0069;AF=0.01;AMR_AF=0.01;AFR_AF=0.02;EUR_AF=0.0013
+1	88710	rs186575039	C	G	100	PASS	ERATE=0.0005;AC=73;THETA=0.0022;AN=2184;VT=SNP;AA=.;AVGPOST=0.9774;LDAF=0.0389;SNPSOURCE=LOWCOV;RSQ=0.8058;AF=0.03;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.08
+1	89165	rs192631277	A	C	100	PASS	RSQ=0.2647;AN=2184;ERATE=0.0009;VT=SNP;AA=.;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;THETA=0.0043;AF=0.0005;ASN_AF=0.0017
+1	89744	rs184101761	A	G	100	PASS	THETA=0.0068;ERATE=0.0004;AVGPOST=0.9985;LDAF=0.0016;AN=2184;RSQ=0.5853;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=2;AF=0.0009;AFR_AF=0.0041
+1	89794	rs188661839	T	C	100	PASS	AN=2184;THETA=0.0130;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;RSQ=0.3812;AVGPOST=0.9984;LDAF=0.0012;AF=0.0005;AFR_AF=0.0020
+1	89946	rs138808727	A	T	100	PASS	RSQ=0.7414;LDAF=0.1417;AN=2184;ERATE=0.0009;AC=236;VT=SNP;AA=.;THETA=0.0100;AVGPOST=0.9001;SNPSOURCE=LOWCOV;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.02;EUR_AF=0.22
+1	91190	rs143856811	G	A	100	PASS	AN=2184;AC=77;ERATE=0.0009;VT=SNP;AA=.;LDAF=0.0447;SNPSOURCE=LOWCOV;RSQ=0.7517;THETA=0.0113;AVGPOST=0.9690;AF=0.04;ASN_AF=0.0017;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.08
+1	91228	rs139873689	A	G	100	PASS	AN=2184;AVGPOST=0.9924;AC=8;RSQ=0.5097;VT=SNP;AA=.;ERATE=0.0010;THETA=0.0012;SNPSOURCE=LOWCOV;LDAF=0.0070;AF=0.0037;AFR_AF=0.02
+1	91536	rs77418980	G	T	100	PASS	AC=695;AN=2184;ERATE=0.0025;AVGPOST=0.7792;VT=SNP;AA=.;THETA=0.0018;LDAF=0.3255;RSQ=0.6634;SNPSOURCE=LOWCOV;AF=0.32;ASN_AF=0.34;AMR_AF=0.30;AFR_AF=0.04;EUR_AF=0.50
+1	91581	rs151118460	G	A	100	PASS	AVGPOST=0.7763;THETA=0.0078;AN=2184;VT=SNP;AA=.;AC=716;ERATE=0.0035;LDAF=0.3353;RSQ=0.6618;SNPSOURCE=LOWCOV;AF=0.33;ASN_AF=0.37;AMR_AF=0.30;AFR_AF=0.04;EUR_AF=0.50
+1	91605	rs141083882	C	T	100	PASS	AC=105;LDAF=0.0597;AN=2184;RSQ=0.7792;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9660;SNPSOURCE=LOWCOV;THETA=0.0070;AF=0.05;AMR_AF=0.02;AFR_AF=0.20
+1	92633	rs149776517	C	T	100	PASS	THETA=0.0054;AN=2184;VT=SNP;AA=.;AC=44;AVGPOST=0.9592;ERATE=0.0008;LDAF=0.0366;SNPSOURCE=LOWCOV;RSQ=0.5870;AF=0.02;AMR_AF=0.02;EUR_AF=0.05
+1	92858	rs147061536	G	T	100	PASS	AC=248;THETA=0.0212;RSQ=0.7567;AN=2184;ERATE=0.0046;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AVGPOST=0.9072;LDAF=0.1433;AF=0.11;ASN_AF=0.01;AMR_AF=0.15;AFR_AF=0.05;EUR_AF=0.22
+1	92875	rs193157612	T	C	100	PASS	THETA=0.0048;AVGPOST=0.9957;AN=2184;LDAF=0.0040;ERATE=0.0009;VT=SNP;AA=.;RSQ=0.4901;SNPSOURCE=LOWCOV;AC=4;AF=0.0018;EUR_AF=0.01
+1	94421	rs200856736	TC	T	90	PASS	AA=TC;AC=253;AF=0.12;AFR_AF=0.01;AMR_AF=0.20;AN=2184;ASN_AF=0.26;AVGPOST=0.7183;ERATE=0.0117;EUR_AF=0.03;LDAF=0.2244;RSQ=0.3175;THETA=0.0159;VT=INDEL
+1	94986	rs185004859	C	T	100	PASS	ERATE=0.0166;AN=2184;AC=100;THETA=0.0227;VT=SNP;AA=.;LDAF=0.0872;AVGPOST=0.9040;SNPSOURCE=LOWCOV;RSQ=0.4650;AF=0.05;ASN_AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.04
+1	94991	rs188832636	G	A	100	PASS	THETA=0.0048;LDAF=0.0041;AN=2184;AVGPOST=0.9944;ERATE=0.0009;VT=SNP;AA=.;SNPSOURCE=LOWCOV;RSQ=0.4157;AC=3;AF=0.0014;AMR_AF=0.0028;EUR_AF=0.0026
+1	98583	rs141344361	T	A	100	PASS	AVGPOST=0.9463;AC=248;THETA=0.0099;AN=2184;VT=SNP;AA=.;RSQ=0.8090;LDAF=0.1336;ERATE=0.0008;SNPSOURCE=LOWCOV;AF=0.11;ASN_AF=0.30;AMR_AF=0.16;AFR_AF=0.01;EUR_AF=0.02
+1	98929	rs12184306	A	G	100	PASS	RSQ=0.6226;AVGPOST=0.8784;AN=2184;VT=SNP;AA=.;ERATE=0.0045;LDAF=0.1723;SNPSOURCE=LOWCOV;AC=264;THETA=0.0070;AF=0.12;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.16;EUR_AF=0.09
+1	98946	rs191775802	C	G	100	PASS	AVGPOST=0.9945;ERATE=0.0013;AN=2184;LDAF=0.0046;VT=SNP;AA=.;RSQ=0.4807;SNPSOURCE=LOWCOV;AC=4;THETA=0.0097;AF=0.0018;AFR_AF=0.01
+1	98974	rs12184307	A	G	100	PASS	AVGPOST=0.8921;AN=2184;AC=224;THETA=0.0130;VT=SNP;AA=.;LDAF=0.1405;RSQ=0.6149;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.10;ASN_AF=0.14;AMR_AF=0.07;AFR_AF=0.14;EUR_AF=0.06
+1	99671	rs146209971	A	T	100	PASS	THETA=0.0199;AN=2184;AC=13;RSQ=0.4401;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9802;SNPSOURCE=LOWCOV;LDAF=0.0158;AF=0.01;AMR_AF=0.02;AFR_AF=0.0020;EUR_AF=0.01
+1	99687	rs139153227	C	T	100	PASS	THETA=0.0211;LDAF=0.0470;AN=2184;VT=SNP;AA=.;ERATE=0.0010;RSQ=0.6276;AVGPOST=0.9548;AC=64;SNPSOURCE=LOWCOV;AF=0.03;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.07
+1	99719	rs183898652	C	T	100	PASS	AN=2184;RSQ=0.5856;VT=SNP;AA=.;AC=10;ERATE=0.0018;SNPSOURCE=LOWCOV;LDAF=0.0076;AVGPOST=0.9925;THETA=0.0251;AF=0.0046;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.0013
+1	100676	rs188226172	A	T	100	PASS	THETA=0.0212;LDAF=0.0047;AN=2184;VT=SNP;AA=.;RSQ=0.2646;SNPSOURCE=LOWCOV;ERATE=0.0012;AC=2;AVGPOST=0.9925;AF=0.0009;AMR_AF=0.0028;AFR_AF=0.0020
+1	103905	rs142403309	A	G	100	PASS	AN=2184;THETA=0.0131;ERATE=0.0025;AVGPOST=0.8782;AC=220;VT=SNP;AA=.;LDAF=0.1434;SNPSOURCE=LOWCOV;RSQ=0.5994;AF=0.10;ASN_AF=0.10;AMR_AF=0.14;AFR_AF=0.15;EUR_AF=0.05
+1	106544	rs180741296	C	G	100	PASS	AC=205;AVGPOST=0.5776;AN=2184;VT=SNP;AA=.;LDAF=0.3120;SNPSOURCE=LOWCOV;ERATE=0.0061;THETA=0.0372;RSQ=0.1442;AF=0.09;ASN_AF=0.11;AMR_AF=0.13;AFR_AF=0.11;EUR_AF=0.05
+1	109107	rs201432136	G	GT	67	PASS	AA=G;AC=63;AF=0.03;AFR_AF=0.01;AMR_AF=0.04;AN=2184;ASN_AF=0.03;AVGPOST=0.8840;ERATE=0.0122;EUR_AF=0.04;LDAF=0.0890;RSQ=0.3660;THETA=0.0210;VT=INDEL
+1	111513	rs199911222	C	CTA	249	PASS	AA=.;AC=58;AF=0.03;AFR_AF=0.09;AMR_AF=0.03;AN=2184;ASN_AF=0.0017;AVGPOST=0.9145;ERATE=0.0024;EUR_AF=0.0013;LDAF=0.0665;RSQ=0.4694;THETA=0.0292;VT=INDEL
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 696cdfe..c0641b0 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -205,6 +205,17 @@ def test_issue_49(self):
             pass
 
 
+class Test1kgSites(unittest.TestCase):
+
+    def test_reader(self):
+        """The samples attribute should be the empty list."""
+        reader = vcf.Reader(fh('1kg.sites.vcf', 'r'))
+
+        self.assertEqual(reader.samples, [])
+        for record in reader:
+            self.assertEqual(record.samples, [])
+
+
 class TestGatkOutputWriter(unittest.TestCase):
 
     def testWrite(self):
@@ -841,6 +852,7 @@ def test_trim(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))

From 9d7f44f71b817a83378b459491ae3193c4b5a170 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 26 Feb 2013 17:34:41 +0100
Subject: [PATCH 042/168] Only write FORMAT if it is in the template

Also, don't write any additional tab characters at the end of the record.
---
 vcf/parser.py        | 13 ++++++++-----
 vcf/test/test_vcf.py | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 10c6268..fc85ca8 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -220,6 +220,7 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self.samples = None
         self._sample_indexes = None
         self._header_lines = []
+        self._column_headers = []
         self._tabix = None
         self._prepend_chr = prepend_chr
         self._parse_metainfo()
@@ -274,7 +275,8 @@ def _parse_metainfo(self):
 
             line = self.reader.next()
 
-        fields = re.split('\t| +', line)
+        fields = re.split('\t| +', line[1:])
+        self._column_headers = fields[:9]
         self.samples = fields[9:]
         self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
 
@@ -538,8 +540,6 @@ def fetch(self, chrom, start, end=None):
 class Writer(object):
     """ VCF Writer """
 
-    fixed_fields = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT".split()
-
     # Reverse keys and values in header field count dictionary
     counts = dict((v,k) for k,v in field_counts.iteritems())
 
@@ -574,13 +574,16 @@ def __init__(self, stream, template, lineterminator="\r\n"):
 
     def _write_header(self):
         # TODO: write INFO, etc
-        self.writer.writerow(self.fixed_fields + self.template.samples)
+        self.stream.write('#' + '\t'.join(self.template._column_headers
+                                          + self.template.samples) + '\n')
 
     def write_record(self, record):
         """ write a record to the file """
         ffs = self._map(str, [record.CHROM, record.POS, record.ID, record.REF]) \
               + [self._format_alt(record.ALT), record.QUAL or '.', self._format_filter(record.FILTER),
-                 self._format_info(record.INFO), record.FORMAT]
+                 self._format_info(record.INFO)]
+        if record.FORMAT:
+            ffs.append(record.FORMAT)
 
         samples = [self._format_sample(record.FORMAT, sample)
             for sample in record.samples]
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index c0641b0..40de4df 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -192,6 +192,8 @@ class Test1kg(unittest.TestCase):
     def testParse(self):
         reader = vcf.Reader(fh('1kg.vcf.gz', 'rb'))
 
+        assert 'FORMAT' in reader._column_headers
+
         self.assertEqual(len(reader.samples), 629)
         for _ in reader:
             pass
@@ -211,10 +213,30 @@ def test_reader(self):
         """The samples attribute should be the empty list."""
         reader = vcf.Reader(fh('1kg.sites.vcf', 'r'))
 
+        assert 'FORMAT' not in reader._column_headers
+
         self.assertEqual(reader.samples, [])
         for record in reader:
             self.assertEqual(record.samples, [])
 
+    def test_writer(self):
+        """FORMAT should not be written if not present in the template and no
+        extra tab character should be printed if there are no FORMAT fields."""
+        reader = vcf.Reader(fh('1kg.sites.vcf', 'r'))
+        out = StringIO()
+        writer = vcf.Writer(out, reader, lineterminator='\n')
+
+        for record in reader:
+            writer.write_record(record)
+        out.seek(0)
+        out_str = out.getvalue()
+        for line in out_str.split('\n'):
+            if line.startswith('##'):
+                continue
+            if line.startswith('#CHROM'):
+                assert 'FORMAT' not in line
+            assert not line.endswith('\t')
+
 
 class TestGatkOutputWriter(unittest.TestCase):
 

From 4fb0c86b505548d11d382526c4f7b718828dcdc0 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sat, 16 Mar 2013 11:19:37 +0100
Subject: [PATCH 043/168] Adhere to `strict_whitespace` in parsing column
 headers

Fixes parsing of sample names with space characters in `strict_whitespace`
mode. Suggested by Lee Lichtenstein and Manaswi Gupta.
---
 vcf/parser.py        | 12 ++++++------
 vcf/test/test_vcf.py |  9 +++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index fc85ca8..c1da964 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -205,6 +205,11 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
             if sys.version > '3':
                 self._reader = codecs.getreader('ascii')(self._reader)
 
+        if strict_whitespace:
+            self._separator = '\t'
+        else:
+            self._separator = '\t| +'
+
         self.reader = (line.strip() for line in self._reader if line.strip())
 
         #: metadata fields from header (string or hash, depending)
@@ -226,11 +231,6 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self._parse_metainfo()
         self._format_cache = {}
 
-        if strict_whitespace:
-            self._separator = '\t'
-        else:
-            self._separator = '\t| +'
-
     def __iter__(self):
         return self
 
@@ -275,7 +275,7 @@ def _parse_metainfo(self):
 
             line = self.reader.next()
 
-        fields = re.split('\t| +', line[1:])
+        fields = re.split(self._separator, line[1:])
         self._column_headers = fields[:9]
         self.samples = fields[9:]
         self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 40de4df..71df788 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -321,6 +321,14 @@ def testWrite(self):
                 assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
 
 
+class TestSamplesSpace(unittest.TestCase):
+    filename = 'samples-space.vcf'
+    samples = ['NA 00001', 'NA 00002', 'NA 00003']
+    def test_samples(self):
+        self.reader = vcf.Reader(fh(self.filename), strict_whitespace=True)
+        self.assertEqual(self.reader.samples, self.samples)
+
+
 class TestRecord(unittest.TestCase):
 
     def test_num_calls(self):
@@ -875,6 +883,7 @@ def test_trim(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))

From 0fd74aac844414852f3606af2105b8e146b03f28 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sat, 16 Mar 2013 11:24:20 +0100
Subject: [PATCH 044/168] Forgot to add test file

---
 vcf/test/samples-space.vcf | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 vcf/test/samples-space.vcf

diff --git a/vcf/test/samples-space.vcf b/vcf/test/samples-space.vcf
new file mode 100644
index 0000000..8c9bb9e
--- /dev/null
+++ b/vcf/test/samples-space.vcf
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA 00001	NA 00002	NA 00003
+20	14370	rs6054257	G	A	29	PASS	.	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	./.
+20	76766	rs6054257	C	T	29	PASS	.	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	./.

From 46f83b1e936e23f7b3bad3a78635b580d0fb787a Mon Sep 17 00:00:00 2001
From: Nils Homer <Nils.Homer@childrens.harvard.edu>
Date: Thu, 6 Jun 2013 14:30:09 -0400
Subject: [PATCH 045/168] * adding support for contigs in the VCF header.

---
 vcf/parser.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index c1da964..7972223 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -67,6 +67,7 @@
 _Alt = collections.namedtuple('Alt', ['id', 'desc'])
 _Format = collections.namedtuple('Format', ['id', 'num', 'type', 'desc'])
 _SampleInfo = collections.namedtuple('SampleInfo', ['samples', 'gt_bases', 'gt_types', 'gt_phases'])
+_Contig = collections.namedtuple('Contig', ['id', 'length'])
 
 
 class _vcf_metadata_parser(object):
@@ -93,6 +94,10 @@ def __init__(self):
             Type=(?P<type>.+),
             Description="(?P<desc>.*)"
             >''', re.VERBOSE)
+        self.contig_pattern = re.compile(r'''\#\#contig=<
+            ID=(?P<id>[^,]+),
+            length=(?P<length>-?\d+)
+            >''', re.VERBOSE)
         self.meta_pattern = re.compile(r'''##(?P<key>.+?)=(?P<val>.+)''')
 
     def vcf_field_count(self, num_str):
@@ -152,6 +157,20 @@ def read_format(self, format_string):
                        match.group('type'), match.group('desc'))
 
         return (match.group('id'), form)
+    
+    def read_contig(self, contig_string):
+        '''Read a meta-contigrmation INFO line.'''
+        match = self.contig_pattern.match(contig_string)
+        if not match:
+            raise SyntaxError(
+                "One of the contig lines is malformed: %s" % contig_string)
+
+        length = self.vcf_field_count(match.group('length'))
+
+        contig = _Contig(match.group('id'), length)
+
+        return (match.group('id'), contig)
+
 
     def read_meta_hash(self, meta_string):
         items = re.split("[<>]", meta_string)
@@ -222,6 +241,8 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
         self.alts = None
         #: FORMAT fields from header
         self.formats = None
+        #: contig fields from header
+        self.contigs = None
         self.samples = None
         self._sample_indexes = None
         self._header_lines = []
@@ -239,7 +260,7 @@ def _parse_metainfo(self):
 
         The end user shouldn't have to use this.  She can access the metainfo
         directly with ``self.metadata``.'''
-        for attr in ('metadata', 'infos', 'filters', 'alts', 'formats'):
+        for attr in ('metadata', 'infos', 'filters', 'alts', 'contigs', 'formats'):
             setattr(self, attr, OrderedDict())
 
         parser = _vcf_metadata_parser()
@@ -263,6 +284,10 @@ def _parse_metainfo(self):
             elif line.startswith('##FORMAT'):
                 key, val = parser.read_format(line)
                 self.formats[key] = val
+            
+            elif line.startswith('##contig'):
+                key, val = parser.read_contig(line)
+                self.contigs[key] = val
 
             else:
                 key, val = parser.read_meta(line)

From 33f0711b16031fca9df07599be7639c1e46d021c Mon Sep 17 00:00:00 2001
From: Nils Homer <Nils.Homer@childrens.harvard.edu>
Date: Thu, 6 Jun 2013 16:56:53 -0400
Subject: [PATCH 046/168] * ignore the rest of the contig information

---
 vcf/parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vcf/parser.py b/vcf/parser.py
index 7972223..fe7ee8f 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -97,6 +97,7 @@ def __init__(self):
         self.contig_pattern = re.compile(r'''\#\#contig=<
             ID=(?P<id>[^,]+),
             length=(?P<length>-?\d+)
+            .*
             >''', re.VERBOSE)
         self.meta_pattern = re.compile(r'''##(?P<key>.+?)=(?P<val>.+)''')
 

From c276e7b296ad89c4a79bc1bff6e44ca558d4df0d Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Thu, 11 Jul 2013 15:26:14 +0100
Subject: [PATCH 047/168] tests and fix for gatk header issue

---
 vcf/parser.py             | 2 +-
 vcf/test/gatk_26_meta.vcf | 3 +++
 vcf/test/test_vcf.py      | 9 +++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 vcf/test/gatk_26_meta.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index fe7ee8f..b4efa86 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -178,7 +178,7 @@ def read_meta_hash(self, meta_string):
         # Removing initial hash marks and final equal sign
         key = items[0][2:-1]
         hashItems = items[1].split(',')
-        val = OrderedDict(item.split("=") for item in hashItems)
+        val = OrderedDict(item.split("=", 1) for item in hashItems)
         return key, val
 
     def read_meta(self, meta_string):
diff --git a/vcf/test/gatk_26_meta.vcf b/vcf/test/gatk_26_meta.vcf
new file mode 100644
index 0000000..2f7ec78
--- /dev/null
+++ b/vcf/test/gatk_26_meta.vcf
@@ -0,0 +1,3 @@
+##fileformat=VCFv4.1
+##GATKCommandLine=<ID=LeftAlignAndTrimVariants,Version=2.6-4-g3e5ff60,Date="Thu Jul 11 13:48:05 BST 2013",Epoch=1373546885069,CommandLineOptions="analysis_type=LeftAlignAndTrimVariants">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 71df788..5650908 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -870,6 +870,15 @@ def test_trim(self):
 
 
 
+class TestGATKMeta(unittest.TestCase):
+
+    def test_meta(self):
+        # expect no exceptions raised
+        reader = vcf.Reader(fh('gatk_26_meta.vcf'))
+        assert 'GATKCommandLine' in reader.metadata
+        assert reader.metadata['GATKCommandLine'][0]['CommandLineOptions'] == '"analysis_type=LeftAlignAndTrimVariants"'
+
+
 
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFreebayesOutput))

From b19f2bdc6b7cd4353e06fb6d8fc5f2644ce72800 Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Thu, 11 Jul 2013 16:55:49 +0100
Subject: [PATCH 048/168] added test and fix for commas inside quoted value

---
 vcf/parser.py             | 32 ++++++++++++++++++++++++++++++--
 vcf/test/gatk_26_meta.vcf |  1 +
 vcf/test/test_vcf.py      |  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index b4efa86..2848576 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -177,8 +177,36 @@ def read_meta_hash(self, meta_string):
         items = re.split("[<>]", meta_string)
         # Removing initial hash marks and final equal sign
         key = items[0][2:-1]
-        hashItems = items[1].split(',')
-        val = OrderedDict(item.split("=", 1) for item in hashItems)
+        # N.B., items can have quoted values, so cannot just split on comma
+        val = OrderedDict()
+        state = 0
+        k = ''
+        v = ''
+        for c in items[1]:
+            if state == 0:  # reading item key
+                if c == '=':
+                    state = 1  # end of key, start reading value
+                else:
+                    k += c  # extend key
+            elif state == 1:  # reading item value
+                if v == '' and c == '"':
+                    v += c  # include quote mark in value
+                    state = 2  # start reading quoted value
+                elif c == ',':
+                    val[k] = v  # store parsed item
+                    state = 0  # read next key
+                    k = ''
+                    v = ''
+                else:
+                    v += c
+            elif state == 2:  # reading quoted item value
+                if c == '"':
+                    v += c  # include quote mark in value
+                    state = 1  # end quoting
+                else:
+                    v += c
+        if k != '':
+            val[k] = v
         return key, val
 
     def read_meta(self, meta_string):
diff --git a/vcf/test/gatk_26_meta.vcf b/vcf/test/gatk_26_meta.vcf
index 2f7ec78..1dd2e56 100644
--- a/vcf/test/gatk_26_meta.vcf
+++ b/vcf/test/gatk_26_meta.vcf
@@ -1,3 +1,4 @@
 ##fileformat=VCFv4.1
 ##GATKCommandLine=<ID=LeftAlignAndTrimVariants,Version=2.6-4-g3e5ff60,Date="Thu Jul 11 13:48:05 BST 2013",Epoch=1373546885069,CommandLineOptions="analysis_type=LeftAlignAndTrimVariants">
+##GATKCommandLine=<ID=VariantAnnotator,Version=2.6-4-g3e5ff60,Date="Thu Jul 11 13:48:41 BST 2013",Epoch=1373546921584,CommandLineOptions="analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 5650908..658669e 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -877,6 +877,7 @@ def test_meta(self):
         reader = vcf.Reader(fh('gatk_26_meta.vcf'))
         assert 'GATKCommandLine' in reader.metadata
         assert reader.metadata['GATKCommandLine'][0]['CommandLineOptions'] == '"analysis_type=LeftAlignAndTrimVariants"'
+        assert reader.metadata['GATKCommandLine'][1]['CommandLineOptions'] == '"analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]"'
 
 
 

From 51fac4ba22f89f87a4c8c361570f3b4465995e8a Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Thu, 11 Jul 2013 17:06:18 +0100
Subject: [PATCH 049/168] whitespace?

---
 vcf/parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vcf/parser.py b/vcf/parser.py
index 2848576..d1fe275 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -183,6 +183,7 @@ def read_meta_hash(self, meta_string):
         k = ''
         v = ''
         for c in items[1]:
+
             if state == 0:  # reading item key
                 if c == '=':
                     state = 1  # end of key, start reading value

From 2c9166529424ffaa29c7fa8883d6670040e84da5 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 12 Jul 2013 12:36:08 +0200
Subject: [PATCH 050/168] Fix contig test case for new contig header parsing

---
 vcf/test/test_vcf.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 71df788..56f3e29 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -48,10 +48,7 @@ def test_vcf_4_1(self):
         self.assertEqual(reader.metadata['fileformat'],  'VCFv4.1')
 
         # contigs were added in vcf4.1
-        # probably need to add a reader.contigs attribute
-        assert 'contig' in reader.metadata
-        assert 'ID' in reader.metadata['contig'][0]
-        assert reader.metadata['contig'][0]['ID'] == '20'
+        self.assertEqual(reader.contigs['20'].length, 62435964)
 
         # test we can walk the file at least
         for r in reader:

From d2f96d8408576a69cd5837846a5e63f194217128 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <superbobry@gmail.com>
Date: Fri, 12 Jul 2013 15:04:09 +0400
Subject: [PATCH 051/168] Added pickling support for '_Record' and '_CallData'
 -- closes #108

---
 vcf/model.py         | 11 +++++++++++
 vcf/test/test_vcf.py |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/vcf/model.py b/vcf/model.py
index 7d28506..17672f7 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -35,6 +35,13 @@ def __eq__(self, other):
                 and self.sample == other.sample
                 and self.gt_type == other.gt_type)
 
+    def __getstate__(self):
+        return dict((attr, getattr(self, attr)) for attr in self.__slots__)
+
+    def __setstate__(self, state):
+        for attr in self.__slots__:
+            setattr(self, attr, state.get(attr))
+
     def gt_phase_char(self):
         return "/" if not self.phased else "|"
 
@@ -540,4 +547,8 @@ def __str__(self):
                 for (x, y) in zip(self._fields, self)])
             return "CallData(" + dat + ')'
 
+        def __reduce__(self):
+            args = super(CallData, self).__reduce__()
+            return make_calldata_tuple, (fields, )
+
     return CallData
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 71df788..0930847 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -3,6 +3,7 @@
 import doctest
 import os
 import commands
+import cPickle
 from StringIO import StringIO
 
 import vcf
@@ -638,6 +639,11 @@ def test_info_multiple_values(self):
         actual = var.INFO['RepeatConsensus']
         self.assertEqual(expected, actual)
 
+    def test_pickle(self):
+        reader = vcf.Reader(fh('example-4.0.vcf'))
+        for var in reader:
+            assert cPickle.loads(cPickle.dumps(var)) == var
+
 
 class TestCall(unittest.TestCase):
 
@@ -688,6 +694,7 @@ def test_gt_types(self):
             elif var.POS == 1234567:
                 self.assertEqual([None,1,2], gt_types)
 
+
 class TestTabix(unittest.TestCase):
 
     def setUp(self):

From 76afe7766623c0981b2d6a0c8cf4e2634c37ada3 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Mon, 15 Jul 2013 08:17:45 +0100
Subject: [PATCH 052/168] add python 3.3 testing, HISTORY updates

---
 .travis.yml      |  1 +
 docs/HISTORY.rst | 11 +++++++++++
 tox.ini          |  5 ++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 47b1002..a1cb3a4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@ python:
   - "2.6"
   - "2.7"
   - "3.2"
+  - "3.3"
   - "pypy"
 install:
   - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse ordereddict; fi"
diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index fc3f2b3..15aba83 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,17 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.4 Release
+-------------
+
+* Handle INFO fields with multiple values, thanks
+* Support writing records without GT data #88, thanks @bow
+* Pickleable call data #112, thanks @superbobry
+* Write files without FORMAT #95 thanks Martijn
+* Strict whitespace mode, thanks Martijn, Lee Lichtenstein and Manawsi Gupta
+* Add support for contigs in header, thanks @gcnh and Martijn
+* Fix GATK header parsing, thanks @alimanfoo
+
 0.6.3 Release
 -------------
 
diff --git a/tox.ini b/tox.ini
index 771e15f..52e1085 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py26, py27, py32
+envlist = py26, py27, py32, py33
 
 [testenv]
 commands =
@@ -27,3 +27,6 @@ deps =
 deps = 
     cython
 
+[testenv:py32]
+deps = 
+    cython

From 2dd86220a746be3e940cbcfe12e1a3a53e05317d Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Mon, 15 Jul 2013 17:57:41 +0100
Subject: [PATCH 053/168] version 0.6.4

---
 vcf/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/__init__.py b/vcf/__init__.py
index 7ab38ee..3e49b09 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.3'
+VERSION = '0.6.4'

From 67b21a1350bfe8280a28f1886a9895962d41cdd4 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 7 Aug 2013 16:47:53 +0200
Subject: [PATCH 054/168] Differentiate between no filtering and PASS

Fixes jamescasbon#114
---
 scripts/vcf_filter.py        |  2 +-
 vcf/model.py                 |  5 ++++-
 vcf/parser.py                |  4 +++-
 vcf/test/mixed-filtering.vcf | 24 ++++++++++++++++++++++++
 vcf/test/test_vcf.py         | 15 +++++++++++++++
 5 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 vcf/test/mixed-filtering.vcf

diff --git a/scripts/vcf_filter.py b/scripts/vcf_filter.py
index 9a08629..fd32b39 100644
--- a/scripts/vcf_filter.py
+++ b/scripts/vcf_filter.py
@@ -162,7 +162,7 @@ def addfilt(filt):
         if output_record:
             # use PASS only if other filter names appear in the FILTER column
             #FIXME: is this good idea?
-            if record.FILTER == '.' and not drop_filtered: record.FILTER = 'PASS'
+            if record.FILTER is None and not drop_filtered: record.FILTER = 'PASS'
             output.write_record(record)
 
 if __name__ == '__main__': main()
diff --git a/vcf/model.py b/vcf/model.py
index 17672f7..a975a82 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -163,7 +163,10 @@ def add_format(self, fmt):
         self.FORMAT = self.FORMAT + ':' + fmt
 
     def add_filter(self, flt):
-        self.FILTER.append(flt)
+        if self.FILTER is None:
+            self.FILTER = [flt]
+        else:
+            self.FILTER.append(flt)
 
     def add_info(self, info, value=True):
         self.INFO[info] = value
diff --git a/vcf/parser.py b/vcf/parser.py
index d1fe275..2ad2368 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -541,7 +541,9 @@ def next(self):
                 qual = None
 
         filt = row[6]
-        if filt == 'PASS' or filt == '.':
+        if filt == '.':
+            filt = None
+        elif filt == 'PASS':
             filt = []
         else:
             filt = filt.split(';')
diff --git a/vcf/test/mixed-filtering.vcf b/vcf/test/mixed-filtering.vcf
new file mode 100644
index 0000000..f02e839
--- /dev/null
+++ b/vcf/test/mixed-filtering.vcf
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.1
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4
+20	1230237	.	T	.	47	.	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
+20	1234567	microsat1	GTC	G,GTCT	50	q10;q50	NS=3;DP=9;AA=G	GT:GQ:DP	0/1:35:4	0/2:17:2	1/1:40:3
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index b3fcf92..798b4db 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -327,6 +327,20 @@ def test_samples(self):
         self.assertEqual(self.reader.samples, self.samples)
 
 
+class TestMixedFiltering(unittest.TestCase):
+    filename = 'mixed-filtering.vcf'
+    def test_mixed_filtering(self):
+        """
+        Test mix of FILTER values (pass, filtered, no filtering).
+        """
+        reader = vcf.Reader(fh(self.filename))
+        self.assertEqual(next(reader).FILTER, [])
+        self.assertEqual(next(reader).FILTER, ['q10'])
+        self.assertEqual(next(reader).FILTER, [])
+        self.assertEqual(next(reader).FILTER, None)
+        self.assertEqual(next(reader).FILTER, ['q10', 'q50'])
+
+
 class TestRecord(unittest.TestCase):
 
     def test_num_calls(self):
@@ -898,6 +912,7 @@ def test_meta(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMixedFiltering))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))

From cc70525e66cc4041e822478a48566939e6160945 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 14 Aug 2013 09:52:32 +0200
Subject: [PATCH 055/168] Allow fields in contig definition before length

This is a bit of a hack and should really be generalized to proper
parsing of all header lines.
---
 vcf/parser.py                        |   1 +
 vcf/test/gonl.chr20.release4.gtc.vcf | 120 +++++++++++++++++++++++++++
 vcf/test/test_vcf.py                 |  13 +++
 3 files changed, 134 insertions(+)
 create mode 100644 vcf/test/gonl.chr20.release4.gtc.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index d1fe275..a6eb99f 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -96,6 +96,7 @@ def __init__(self):
             >''', re.VERBOSE)
         self.contig_pattern = re.compile(r'''\#\#contig=<
             ID=(?P<id>[^,]+),
+            .*
             length=(?P<length>-?\d+)
             .*
             >''', re.VERBOSE)
diff --git a/vcf/test/gonl.chr20.release4.gtc.vcf b/vcf/test/gonl.chr20.release4.gtc.vcf
new file mode 100644
index 0000000..03588bf
--- /dev/null
+++ b/vcf/test/gonl.chr20.release4.gtc.vcf
@@ -0,0 +1,120 @@
+##fileformat=VCFv4.1
+##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false input=[(RodBinding name=input source=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/results/snps/UG_raw_biallelic/gonl.biallelic.vcf)] recal_file=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vcf.1kg_phase1.2.recal tranches_file=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vcf.1kg_phase1.2.tranches out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub ts_filter_level=99.5 ignore_filter=null mode=SNP filter_mismatching_base_and_quals=false"
+##CombineVariants="analysis_type=CombineVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:123000001-126000000] excludeIntervals=null reference_sequence=/humgen/1kg/reference/human_g1k_v37.fasta rodBind=[/humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AFR/AFR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ASN/ASN.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AMR/AMR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/EUR/EUR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AFR.admix/AFR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ASN.admix/ASN.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AMR.admix/AMR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/EUR.admix/EUR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ALL/ALL.phase1.chr1.42.raw.snps.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub genotypemergeoption=PRIORITIZE filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED rod_priority_list=ALL,AFR.admix,AMR.admix,EUR.admix,ASN.admix,AFR,AMR,EUR,ASN printComplexMerges=false filteredAreUncalled=false minimalVCF=false setKey=pop assumeIdenticalSamples=false minimumN=1 masterMerge=false mergeInfoWithMaxAC=true"
+##FILTER=<ID=TruthSensitivityTranche99.50to99.60,Description="Truth sensitivity tranche level at VSQ Lod: 0.0349 <= x < 0.6732">
+##FILTER=<ID=TruthSensitivityTranche99.60to99.70,Description="Truth sensitivity tranche level at VSQ Lod: -1.1344 <= x < 0.0349">
+##FILTER=<ID=TruthSensitivityTranche99.70to99.80,Description="Truth sensitivity tranche level at VSQ Lod: -3.7349 <= x < -1.1344">
+##FILTER=<ID=TruthSensitivityTranche99.80to99.90,Description="Truth sensitivity tranche level at VSQ Lod: -13.9352 <= x < -3.7349">
+##FILTER=<ID=TruthSensitivityTranche99.85to99.87,Description="Truth sensitivity tranche level at VSQ Lod: -0.0128 <= x < 0.5027">
+##FILTER=<ID=TruthSensitivityTranche99.87to99.88,Description="Truth sensitivity tranche level at VSQ Lod: -0.2657 <= x < -0.0128">
+##FILTER=<ID=TruthSensitivityTranche99.88to99.89,Description="Truth sensitivity tranche level at VSQ Lod: -0.5671 <= x < -0.2657">
+##FILTER=<ID=TruthSensitivityTranche99.89to99.90,Description="Truth sensitivity tranche level at VSQ Lod: -0.9162 <= x < -0.5671">
+##FILTER=<ID=TruthSensitivityTranche99.90to100.00+,Description="Truth sensitivity tranche level at VQS Lod < -37539.4862">
+##FILTER=<ID=TruthSensitivityTranche99.90to100.00,Description="Truth sensitivity tranche level at VSQ Lod: -37539.4862 <= x < -13.9352">
+##FILTER=<ID=TruthSensitivityTranche99.90to99.91,Description="Truth sensitivity tranche level at VSQ Lod: -1.2368 <= x < -0.9162">
+##FILTER=<ID=TruthSensitivityTranche99.91to99.92,Description="Truth sensitivity tranche level at VSQ Lod: -1.6803 <= x < -1.2368">
+##FILTER=<ID=TruthSensitivityTranche99.92to99.93,Description="Truth sensitivity tranche level at VSQ Lod: -2.1816 <= x < -1.6803">
+##FILTER=<ID=TruthSensitivityTranche99.93to99.94,Description="Truth sensitivity tranche level at VSQ Lod: -2.8718 <= x < -2.1816">
+##FILTER=<ID=TruthSensitivityTranche99.94to100.00+,Description="Truth sensitivity tranche level at VQS Lod < -Infinity">
+##FILTER=<ID=TruthSensitivityTranche99.94to100.00,Description="Truth sensitivity tranche level at VSQ Lod: -Infinity <= x < -2.8718">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##SelectVariants="analysis_type=SelectVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:1-5000001] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/home/lfrancioli/gonl/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/results/trio-analysis/ug_initial/gonl.1_1-5000001.vcf) discordance=(RodBinding name= source=UNBOUND) concordance=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sample_name=[] sample_expressions=null sample_file=null exclude_sample_name=[] exclude_sample_file=[] select_expressions=[] excludeNonVariants=false excludeFiltered=false restrictAllelesTo=BIALLELIC keepOriginalAC=false mendelianViolation=false mendelianViolationQualThreshold=0.0 select_random_number=0 select_random_fraction=0.0 remove_fraction_genotypes=0.0 selectTypeToInclude=[] keepIDs=null outMVFile=null filter_mismatching_base_and_quals=false"
+##SetFilterPASS="analysis_type=SetFilterPASS input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa nonDeterministicRandomSeed=false disableRandomization=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 enable_experimental_downsampling=false baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vqsr_1kg_phase1.2.99.5.vcf) sites=[(RodBinding name=sites source=/target/gpfs2/gcc/home/lfrancioli/resources/1000GP_hg19/EUR.wgs.project_consensus_vqsr2b.20101123.snps.low_coverage.sites.vcf)] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub filter_mismatching_base_and_quals=false"
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/1kg/phase1_cleaned_bams/bams/chr1/CHB.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/CHS.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/CLM.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/JPT.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/MXL.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/PUR.phase1.chr1.42.cleaned.bam] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:123000001-126000000] excludeIntervals=null reference_sequence=/humgen/1kg/reference/human_g1k_v37.fasta rodBind=[/humgen/1kg/processing/production_wgs_phase1/consensus/ALL.phase1.wgs.unionBC1.pass.sites.vcf, /humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=50 baq=CALCULATE_AS_NECESSARY baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=SNP p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=GENOTYPE_GIVEN_ALLELES output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=4.0 standard_min_confidence_threshold_for_emitting=4.0 noSLOD=false assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=0.05 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[]"
+##VariantAnnotator="analysis_type=VariantAnnotator input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:1-5000001] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/home/lfrancioli/gonl/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[/target/gpfs2/gcc/home/lfrancioli/gonl/resources/UnifiedGenotyper/GoNL.ped] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/results/trio-analysis/snps/gonl.1_1-5000001.biallelic.vcf) snpEffFile=(RodBinding name= source=UNBOUND) dbsnp=(RodBinding name= source=UNBOUND) comp=[] resource=[] out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub annotation=[TransmissionDisequilibriumTest, InbreedingCoeff, AlleleDosage, ChromosomeCounts] excludeAnnotation=[] group=[] expression=[] useAllAnnotations=false list=false vcfContainsOnlyIndels=false MendelViolationGenotypeQualityThreshold=0.0 requireStrictAlleleMatch=false filter_mismatching_base_and_quals=false"
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=10,assembly=b37,length=135534747>
+##contig=<ID=11,assembly=b37,length=135006516>
+##contig=<ID=12,assembly=b37,length=133851895>
+##contig=<ID=13,assembly=b37,length=115169878>
+##contig=<ID=14,assembly=b37,length=107349540>
+##contig=<ID=15,assembly=b37,length=102531392>
+##contig=<ID=16,assembly=b37,length=90354753>
+##contig=<ID=17,assembly=b37,length=81195210>
+##contig=<ID=18,assembly=b37,length=78077248>
+##contig=<ID=19,assembly=b37,length=59128983>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=20,assembly=b37,length=63025520>
+##contig=<ID=21,assembly=b37,length=48129895>
+##contig=<ID=22,assembly=b37,length=51304566>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##contig=<ID=5,assembly=b37,length=180915260>
+##contig=<ID=6,assembly=b37,length=171115067>
+##contig=<ID=7,assembly=b37,length=159138663>
+##contig=<ID=8,assembly=b37,length=146364022>
+##contig=<ID=9,assembly=b37,length=141213431>
+##contig=<ID=GL000191.1,assembly=b37,length=106433>
+##contig=<ID=GL000192.1,assembly=b37,length=547496>
+##contig=<ID=GL000193.1,assembly=b37,length=189789>
+##contig=<ID=GL000194.1,assembly=b37,length=191469>
+##contig=<ID=GL000195.1,assembly=b37,length=182896>
+##contig=<ID=GL000196.1,assembly=b37,length=38914>
+##contig=<ID=GL000197.1,assembly=b37,length=37175>
+##contig=<ID=GL000198.1,assembly=b37,length=90085>
+##contig=<ID=GL000199.1,assembly=b37,length=169874>
+##contig=<ID=GL000200.1,assembly=b37,length=187035>
+##contig=<ID=GL000201.1,assembly=b37,length=36148>
+##contig=<ID=GL000202.1,assembly=b37,length=40103>
+##contig=<ID=GL000203.1,assembly=b37,length=37498>
+##contig=<ID=GL000204.1,assembly=b37,length=81310>
+##contig=<ID=GL000205.1,assembly=b37,length=174588>
+##contig=<ID=GL000206.1,assembly=b37,length=41001>
+##contig=<ID=GL000207.1,assembly=b37,length=4262>
+##contig=<ID=GL000208.1,assembly=b37,length=92689>
+##contig=<ID=GL000209.1,assembly=b37,length=159169>
+##contig=<ID=GL000210.1,assembly=b37,length=27682>
+##contig=<ID=GL000211.1,assembly=b37,length=166566>
+##contig=<ID=GL000212.1,assembly=b37,length=186858>
+##contig=<ID=GL000213.1,assembly=b37,length=164239>
+##contig=<ID=GL000214.1,assembly=b37,length=137718>
+##contig=<ID=GL000215.1,assembly=b37,length=172545>
+##contig=<ID=GL000216.1,assembly=b37,length=172294>
+##contig=<ID=GL000217.1,assembly=b37,length=172149>
+##contig=<ID=GL000218.1,assembly=b37,length=161147>
+##contig=<ID=GL000219.1,assembly=b37,length=179198>
+##contig=<ID=GL000220.1,assembly=b37,length=161802>
+##contig=<ID=GL000221.1,assembly=b37,length=155397>
+##contig=<ID=GL000222.1,assembly=b37,length=186861>
+##contig=<ID=GL000223.1,assembly=b37,length=180455>
+##contig=<ID=GL000224.1,assembly=b37,length=179693>
+##contig=<ID=GL000225.1,assembly=b37,length=211173>
+##contig=<ID=GL000226.1,assembly=b37,length=15008>
+##contig=<ID=GL000227.1,assembly=b37,length=128374>
+##contig=<ID=GL000228.1,assembly=b37,length=129120>
+##contig=<ID=GL000229.1,assembly=b37,length=19913>
+##contig=<ID=GL000230.1,assembly=b37,length=43691>
+##contig=<ID=GL000231.1,assembly=b37,length=27386>
+##contig=<ID=GL000232.1,assembly=b37,length=40652>
+##contig=<ID=GL000233.1,assembly=b37,length=45941>
+##contig=<ID=GL000234.1,assembly=b37,length=40531>
+##contig=<ID=GL000235.1,assembly=b37,length=34474>
+##contig=<ID=GL000236.1,assembly=b37,length=41934>
+##contig=<ID=GL000237.1,assembly=b37,length=45867>
+##contig=<ID=GL000238.1,assembly=b37,length=39939>
+##contig=<ID=GL000239.1,assembly=b37,length=33824>
+##contig=<ID=GL000240.1,assembly=b37,length=41933>
+##contig=<ID=GL000241.1,assembly=b37,length=42152>
+##contig=<ID=GL000242.1,assembly=b37,length=43523>
+##contig=<ID=GL000243.1,assembly=b37,length=43341>
+##contig=<ID=GL000244.1,assembly=b37,length=39929>
+##contig=<ID=GL000245.1,assembly=b37,length=36651>
+##contig=<ID=GL000246.1,assembly=b37,length=38154>
+##contig=<ID=GL000247.1,assembly=b37,length=36422>
+##contig=<ID=GL000248.1,assembly=b37,length=39786>
+##contig=<ID=GL000249.1,assembly=b37,length=38502>
+##contig=<ID=MT,assembly=b37,length=16569>
+##contig=<ID=X,assembly=b37,length=155270560>
+##contig=<ID=Y,assembly=b37,length=59373566>
+##reference=file:///target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa
+##source=SelectVariants
+##INFO=<ID=GTC,Number=G,Type=Integer,Description="GenoType Counts. For each ALT allele in the same order as listed = 0/0,0/1,1/1,0/2,1/2,2/2,0/3,1/3,2/3,3/3,etc. Phasing is ignored; hence 1/0, 0|1 and 1|0 are all counted as 0/1. When one or more alleles is not called for a genotype in a specific sample (./., ./0, ./1, ./2, etc.), that sample's genotype is completely discarded for calculating GTC.">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+20	60309	.	G	T	991.76	PASS	AC=4;AN=996;GTC=494,4,0
+20	60573	.	T	C	124.17	PASS	AC=1;AN=996;GTC=497,1,0
+20	60828	.	T	G	807.71	PASS	AC=6;AN=996;GTC=492,6,0
+20	61098	rs6078030	C	T	51254.56	PASS	AC=225;AN=996;GTC=304,163,31
+20	61270	.	A	C	2414.84	PASS	AC=20;AN=992;GTC=476,20,0
+20	61289	.	A	C	419.41	TruthSensitivityTranche99.70to99.80	AC=71;AN=960;GTC=411,67,2
+20	61682	.	C	T	12.27	PASS	AC=1;AN=996;GTC=497,1,0
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index b3fcf92..f1824fa 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -236,6 +236,18 @@ def test_writer(self):
             assert not line.endswith('\t')
 
 
+class TestGoNL(unittest.TestCase):
+
+    def testParse(self):
+        reader = vcf.Reader(fh('gonl.chr20.release4.gtc.vcf'))
+        for _ in reader:
+            pass
+
+    def test_contig_line(self):
+        reader = vcf.Reader(fh('gonl.chr20.release4.gtc.vcf'))
+        self.assertEqual(reader.contigs['1'].length, 249250621)
+
+
 class TestGatkOutputWriter(unittest.TestCase):
 
     def testWrite(self):
@@ -897,6 +909,7 @@ def test_meta(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGoNL))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))

From 831c023afdd4034d9d8d599b361914d4baf12394 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 19 Sep 2013 21:37:17 +0200
Subject: [PATCH 056/168] Test if contig lines are output by writer

---
 vcf/test/test_vcf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index c3a2e2f..9d0e204 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -271,6 +271,7 @@ def testWrite(self):
         self.assertEquals(reader.samples, reader2.samples)
         self.assertEquals(reader.formats, reader2.formats)
         self.assertEquals(reader.infos, reader2.infos)
+        self.assertEquals(reader.contigs, reader2.contigs)
 
         for l, r in zip(records, reader2):
             self.assertEquals(l.samples, r.samples)

From bb72c5b126b3799e7285d755ee87704ad49971dd Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 19 Sep 2013 21:38:16 +0200
Subject: [PATCH 057/168] Output contig lines in writer

---
 vcf/parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vcf/parser.py b/vcf/parser.py
index 20a12a8..aac102b 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -627,6 +627,8 @@ def __init__(self, stream, template, lineterminator="\r\n"):
             stream.write(two.format(key="FILTER", *line))
         for line in template.alts.itervalues():
             stream.write(two.format(key="ALT", *line))
+        for line in template.contigs.itervalues():
+            stream.write('##contig=<ID={0},length={1}>\n'.format(*line))
 
         self._write_header()
 

From c4c69255f95d8776909a6edc54c005e6c7448eb8 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 20 Sep 2013 12:10:59 +0200
Subject: [PATCH 058/168] Test parsing and writing INFO with type Character

---
 vcf/test/info-type-character.vcf |  8 ++++++++
 vcf/test/test_vcf.py             | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 vcf/test/info-type-character.vcf

diff --git a/vcf/test/info-type-character.vcf b/vcf/test/info-type-character.vcf
new file mode 100644
index 0000000..77b24a7
--- /dev/null
+++ b/vcf/test/info-type-character.vcf
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.1
+##INFO=<ID=FLOAT_1,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=CHAR_1,Number=1,Type=Character,Description="A character value">
+##INFO=<ID=FLOAT_N,Number=.,Type=Float,Description="Floating point values">
+##INFO=<ID=CHAR_N,Number=.,Type=Character,Description="Character values">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample
+chr1	100	id1	G	A	.	.	FLOAT_1=123.456;CHAR_1=Y;FLOAT_N=123.456;CHAR_N=Y	GT	0/1
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 9d0e204..bcc39a9 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -248,6 +248,31 @@ def test_contig_line(self):
         self.assertEqual(reader.contigs['1'].length, 249250621)
 
 
+class TestInfoTypeCharacter(unittest.TestCase):
+    def test_parse(self):
+        reader = vcf.Reader(fh('info-type-character.vcf'))
+        record = next(reader)
+        self.assertEqual(record.INFO['FLOAT_1'], 123.456)
+        self.assertEqual(record.INFO['CHAR_1'], 'Y')
+        self.assertEqual(record.INFO['FLOAT_N'], [123.456])
+        self.assertEqual(record.INFO['CHAR_N'], ['Y'])
+
+    def test_write(self):
+        reader = vcf.Reader(fh('info-type-character.vcf'))
+        out = StringIO()
+        writer = vcf.Writer(out, reader)
+
+        records = list(reader)
+
+        for record in records:
+            writer.write_record(record)
+        out.seek(0)
+        reader2 = vcf.Reader(out)
+
+        for l, r in zip(records, reader2):
+            self.assertEquals(l.INFO, r.INFO)
+
+
 class TestGatkOutputWriter(unittest.TestCase):
 
     def testWrite(self):

From e3fc03a5819aa67943ae53a1913cdc5674aaa43a Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 20 Sep 2013 12:12:01 +0200
Subject: [PATCH 059/168] Fix parsing INFO lines with type Character

This fixes GitHub issue #120 (thanks @AndrewUzilov).
---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index aac102b..0aeaeb7 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -379,7 +379,7 @@ def _parse_info(self, info_str):
                 val = self._map(float, vals)
             elif entry_type == 'Flag':
                 val = True
-            elif entry_type == 'String':
+            elif entry_type in ('String', 'Character'):
                 try:
                     vals = entry[1].split(',') # commas are reserved characters indicating multiple values
                     val = self._map(str, vals)

From 58ef505e72b88f082bdfb98f3cdebe69db82becf Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 24 Sep 2013 10:55:26 +0200
Subject: [PATCH 060/168] Add TestInfoTypeCharacter to test suite

---
 vcf/test/test_vcf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index bcc39a9..9b18baf 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -942,6 +942,7 @@ def test_meta(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamtoolsOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoTypeCharacter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))

From 60398055dd86caf4d17d11a1be32faa6a99b22bc Mon Sep 17 00:00:00 2001
From: Peter Krusche <pkrusche@illumina.com>
Date: Tue, 5 Nov 2013 17:18:28 +0000
Subject: [PATCH 061/168] Fixed exception when reading single breakends

---
 vcf/model.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index a975a82..54794fc 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -473,9 +473,15 @@ class _Breakend(_AltRecord):
     def __init__(self, chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly, **kwargs):
         super(_Breakend, self).__init__(type="BND", **kwargs)
         #: The chromosome of breakend's mate.
-        self.chr = str(chr)
+        if chr is not None:
+            self.chr = str(chr)
+        else:
+            self.chr = None  # Single breakend
         #: The coordinate of breakend's mate.
-        self.pos = int(pos)
+        if pos is not None:
+            self.pos = int(pos)
+        else:
+            self.pos = None
         #: The orientation of breakend's mate. If the sequence 3' of the breakend's mate is connected, True, else if the sequence 5' of the breakend's mate is connected, False.
         self.remoteOrientation = remoteOrientation
         #: If the breakend mate is within the assembly, True, else False if the breakend mate is on a contig in an ancillary assembly file.

From 4892aabf96568bb72b0f3c41c1a7d5d88a8c81c4 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sat, 16 Nov 2013 21:06:42 +0100
Subject: [PATCH 062/168] Do not maintain the order of INFO fields within
 records

Using an ordinary dict instead of an OrderdDict for the INFO fields makes
parsing faster. The INFO fields are sorted by the VCF writer where all fields
defined in the VCF header go first and in the same order, followed by the
remaining fields in alpabetical order. Note that this make writing slower.

We lose two things:

1. Getting the INFO fields in original order from a record when using PyVCF as
   a library (but I don't think most users are expecting this anyway).
2. Preserving the original order of the INFO fields when writing (but the
   order is predictable).

The following are some simple benchmarks, starting with just parsing:

1. Without this change (using OrderedDict):

       In [1]: %timeit list(vcf.Reader(open('vcf/test/1kg.sites.vcf')))
       100 loops, best of 3: 15 ms per loop

2. With this change (using dict):

       In [1]: %timeit list(vcf.Reader(open('vcf/test/1kg.sites.vcf')))
       100 loops, best of 3: 10 ms per loop

Now parsing the same file and writing it back to VCF:

1. Without this change (using OrderedDict, no sorting):

       In [1]: %%timeit
          ...: reader = vcf.Reader(open('vcf/test/1kg.sites.vcf'))
          ...: writer = vcf.Writer(open(os.devnull, 'w'), reader)
          ...: for record in reader:
          ...:     writer.write_record(record)
          ...:
       10 loops, best of 3: 22.7 ms per loop

2. With half this change (using dict, no sorting):

       In [1]: %%timeit
          ...: reader = vcf.Reader(open('vcf/test/1kg.sites.vcf'))
          ...: writer = vcf.Writer(open(os.devnull, 'w'), reader)
          ...: for record in reader:
          ...:     writer.write_record(record)
          ...:
       100 loops, best of 3: 16.5 ms per loop

3. With this change (using dict, sorting during write):

       In [6]: %%timeit
          ...: reader = vcf.Reader(open('vcf/test/1kg.sites.vcf'))
          ...: writer = vcf.Writer(open(os.devnull, 'w'), reader)
          ...: for record in reader:
          ...:     writer.write_record(record)
          ...:
       100 loops, best of 3: 17.7 ms per loop

Fixes GitHub issue #96.
---
 vcf/parser.py        | 14 ++++++++++++--
 vcf/test/test_vcf.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 0aeaeb7..5706ccf 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -350,7 +350,7 @@ def _parse_info(self, info_str):
             return {}
 
         entries = info_str.split(';')
-        retdict = OrderedDict()
+        retdict = {}
 
         for entry in entries:
             entry = entry.split('=')
@@ -606,6 +606,12 @@ def __init__(self, stream, template, lineterminator="\r\n"):
         self.template = template
         self.stream = stream
 
+        # Order keys for INFO fields defined in the header (undefined fields
+        # get a maximum key).
+        self.info_order = collections.defaultdict(
+            lambda: len(template.infos),
+            dict(zip(template.infos.iterkeys(), itertools.count())))
+
         two = '##{key}=<ID={0},Description="{1}">\n'
         four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
         _num = self._fix_field_count
@@ -681,7 +687,11 @@ def _format_filter(self, flt):
     def _format_info(self, info):
         if not info:
             return '.'
-        return ';'.join([self._stringify_pair(x,y) for x, y in info.iteritems()])
+        def order_key(field):
+            # Order by header definition first, alphabetically second.
+            return self.info_order[field], field
+        return ';'.join(self._stringify_pair(f, info[f]) for f in
+                        sorted(info, key=order_key))
 
     def _format_sample(self, fmt, sample):
         try:
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 9b18baf..730e975 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -248,6 +248,43 @@ def test_contig_line(self):
         self.assertEqual(reader.contigs['1'].length, 249250621)
 
 
+class TestInfoOrder(unittest.TestCase):
+
+    def _assert_order(self, definitions, fields):
+        """
+        Elements common to both lists should be in the same order. Elements
+        only in `fields` should be last and in alphabetical order.
+        """
+        used_definitions = [d for d in definitions if d in fields]
+        self.assertEqual(used_definitions, fields[:len(used_definitions)])
+        self.assertEqual(fields[len(used_definitions):],
+                         sorted(fields[len(used_definitions):]))
+
+    def test_writer(self):
+        """
+        Order of INFO fields should be compatible with the order of their
+        definition in the header and undefined fields should be last and in
+        alphabetical order.
+        """
+        reader = vcf.Reader(fh('1kg.sites.vcf', 'r'))
+        out = StringIO()
+        writer = vcf.Writer(out, reader, lineterminator='\n')
+
+        for record in reader:
+            writer.write_record(record)
+        out.seek(0)
+        out_str = out.getvalue()
+
+        definitions = []
+        for line in out_str.split('\n'):
+            if line.startswith('##INFO='):
+                definitions.append(line.split('ID=')[1].split(',')[0])
+            if not line or line.startswith('#'):
+                continue
+            fields = [f.split('=')[0] for f in line.split('\t')[7].split(';')]
+            self._assert_order(definitions, fields)
+
+
 class TestInfoTypeCharacter(unittest.TestCase):
     def test_parse(self):
         reader = vcf.Reader(fh('info-type-character.vcf'))

From 0bd567cb65e8be44287a2cc02944c49c7cec165e Mon Sep 17 00:00:00 2001
From: James Boocock <sfk2001@gmail.com>
Date: Mon, 25 Nov 2013 13:20:10 +1300
Subject: [PATCH 063/168] Fixed tox.ini error regarding duplicate test section.

  File "/usr/local/bin/tox", line 9, in <module>
    load_entry_point('tox==1.6.1', 'console_scripts', 'tox')()
  File "/usr/local/lib/python2.7/dist-packages/tox/_cmdline.py", line
25, in main
    config = parseconfig(args, 'tox')
  File "/usr/local/lib/python2.7/dist-packages/tox/_config.py", line 44,
in parseconfig
    parseini(config, inipath)
  File "/usr/local/lib/python2.7/dist-packages/tox/_config.py", line
187, in __init__
    self._cfg = py.iniconfig.IniConfig(config.toxinipath)
  File "/usr/local/lib/python2.7/dist-packages/py/_iniconfig.py", line
67, in __init__
    self._raise(lineno, 'duplicate section %r'%(section, ))
  File "/usr/local/lib/python2.7/dist-packages/py/_iniconfig.py", line
75, in _raise
    raise ParseError(self.path, lineno, msg)
py._iniconfig.ParseError:
/home/smilefreak/MerrimanSelectionPipeline/PyVCF/tox.ini:30: duplicate
section 'testenv:py32'
---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 52e1085..8a56c17 100644
--- a/tox.ini
+++ b/tox.ini
@@ -27,6 +27,6 @@ deps =
 deps = 
     cython
 
-[testenv:py32]
+[testenv:py33]
 deps = 
     cython

From e50d7506196768e61220cea6376bfb544785f260 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Mon, 25 Nov 2013 11:35:10 +0100
Subject: [PATCH 064/168] Fix incorrect and missing reserved INFO/FORMAT fields

The reserved HOMSEQ INFO field was incorrectly defined as Integer and some
other fields listed in the VCF spec were missing.

Fixes #130
---
 vcf/parser.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 5706ccf..8996d37 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -29,25 +29,30 @@
 RESERVED_INFO = {
     'AA': 'String', 'AC': 'Integer', 'AF': 'Float', 'AN': 'Integer',
     'BQ': 'Float', 'CIGAR': 'String', 'DB': 'Flag', 'DP': 'Integer',
-    'END': 'Integer', 'H2': 'Flag', 'MQ': 'Float', 'MQ0': 'Integer',
-    'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag', 'VALIDATED': 'Flag',
-
-    # VCF 4.1 Additions
-    'IMPRECISE':'Flag', 'NOVEL':'Flag', 'END':'Integer', 'SVTYPE':'String',
-    'CIPOS':'Integer','CIEND':'Integer','HOMLEN':'Integer','HOMSEQ':'Integer',
-    'BKPTID':'String','MEINFO':'String','METRANS':'String','DGVID':'String',
-    'DBVARID':'String','MATEID':'String','PARID':'String','EVENT':'String',
-    'CILEN':'Integer','CN':'Integer','CNADJ':'Integer','CICN':'Integer',
-    'CICNADJ':'Integer'
+    'END': 'Integer', 'H2': 'Flag', 'H3': 'Flag', 'MQ': 'Float',
+    'MQ0': 'Integer', 'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag',
+    'VALIDATED': 'Flag', '1000G': 'Flag',
+
+    # Keys used for structural variants
+    'IMPRECISE': 'Flag', 'NOVEL': 'Flag', 'SVTYPE': 'String',
+    'SVLEN': 'Integer', 'CIPOS': 'Integer', 'CIEND': 'Integer',
+    'HOMLEN': 'Integer', 'HOMSEQ': 'String', 'BKPTID': 'String',
+    'MEINFO': 'String', 'METRANS': 'String', 'DGVID': 'String',
+    'DBVARID': 'String', 'DBRIPID': 'String', 'MATEID': 'String',
+    'PARID': 'String', 'EVENT': 'String', 'CILEN': 'Integer',
+    'DPADJ': 'Integer', 'CN': 'Integer', 'CNADJ': 'Integer',
+    'CICN': 'Integer', 'CICNADJ': 'Integer'
 }
 
 RESERVED_FORMAT = {
     'GT': 'String', 'DP': 'Integer', 'FT': 'String', 'GL': 'Float',
-    'GQ': 'Float', 'HQ': 'Float',
+    'GLE': 'String', 'PL': 'Integer', 'GP': 'Float', 'GQ': 'Integer',
+    'HQ': 'Integer', 'PS': 'Integer', 'PQ': 'Integer', 'EC': 'Integer',
+    'MQ': 'Integer',
 
-    # VCF 4.1 Additions
-    'CN':'Integer','CNQ':'Float','CNL':'Float','NQ':'Integer','HAP':'Integer',
-    'AHAP':'Integer'
+    # Keys used for structural variants
+    'CN': 'Integer', 'CNQ': 'Float', 'CNL': 'Float', 'NQ': 'Integer',
+    'HAP': 'Integer', 'AHAP': 'Integer'
 }
 
 # Spec is a bit weak on which metadata lines are singular, like fileformat

From cfd7091f826b3ede56b91e3d686f34d5e757827d Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Fri, 29 Nov 2013 15:37:25 -0500
Subject: [PATCH 065/168] Added method to return alt. allele frequencies when
 there is more than one alternate allele

---
 vcf/model.py         | 19 ++++++++++++++++---
 vcf/test/test_vcf.py |  4 ++--
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 54794fc..370e586 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -208,17 +208,30 @@ def num_unknown(self):
     @property
     def aaf(self):
         """ The allele frequency of the alternate allele.
-           NOTE 1: Punt if more than one alternate allele.
+           NOTE 1: Return a list of frequencies if more than one alternate allele
            NOTE 2: Denominator calc'ed from _called_ genotypes.
         """
-        # skip if more than one alternate allele. assumes bi-allelic
         if len(self.ALT) > 1:
-            return None
+            return self.multi_aaf
         het = self.num_het
         hom_alt = self.num_hom_alt
         num_chroms = float(2.0 * self.num_called)
         return float(het + 2 * hom_alt) / float(num_chroms)
 
+    @property
+    def multi_aaf(self):
+        """
+        The allele frequency of alternate alleles for multi-allelic loci.
+        Return a list of frequencies for each alternate allele.
+        """
+        num_chroms = 2.0 * self.num_called
+        allele_counts = collections.defaultdict(int)
+        for s in self.samples:
+            if s.gt_type is not None:
+                allele_counts[s.gt_alleles[0]] += 1
+                allele_counts[s.gt_alleles[1]] += 1
+        return [allele_counts[str(i)]/num_chroms for i in range(1, len(self.ALT)+1)]
+
     @property
     def nucl_diversity(self):
         """
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 730e975..a882d5b 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -449,11 +449,11 @@ def test_aaf(self):
             if var.POS == 17330:
                 self.assertEqual(1.0/6.0, aaf)
             if var.POS == 1110696:
-                self.assertEqual(None, aaf)
+                self.assertEqual([2.0/6.0, 4.0/6.0], aaf)
             if var.POS == 1230237:
                 self.assertEqual(0.0/6.0, aaf)
             elif var.POS == 1234567:
-                self.assertEqual(None, aaf)
+                self.assertEqual([2.0/4.0, 1.0/4.0], aaf)
 
     def test_pi(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))

From 50a2fcb2255f4d7a613ae97d8ed1269ad656b7f3 Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Mon, 2 Dec 2013 13:55:12 -0500
Subject: [PATCH 066/168] made aaf a list, changed to use Counter

---
 vcf/model.py         | 15 +++++++--------
 vcf/test/test_vcf.py |  6 +++---
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 370e586..1cf94a8 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -207,16 +207,15 @@ def num_unknown(self):
 
     @property
     def aaf(self):
-        """ The allele frequency of the alternate allele.
-           NOTE 1: Return a list of frequencies if more than one alternate allele
-           NOTE 2: Denominator calc'ed from _called_ genotypes.
+        """ A list of allele frequencies of alternate alleles.
+           NOTE: Denominator calc'ed from _called_ genotypes.
         """
         if len(self.ALT) > 1:
             return self.multi_aaf
         het = self.num_het
         hom_alt = self.num_hom_alt
         num_chroms = float(2.0 * self.num_called)
-        return float(het + 2 * hom_alt) / float(num_chroms)
+        return [float(het + 2 * hom_alt) / float(num_chroms)]
 
     @property
     def multi_aaf(self):
@@ -225,11 +224,11 @@ def multi_aaf(self):
         Return a list of frequencies for each alternate allele.
         """
         num_chroms = 2.0 * self.num_called
-        allele_counts = collections.defaultdict(int)
+        allele_counts = collections.Counter()
         for s in self.samples:
             if s.gt_type is not None:
-                allele_counts[s.gt_alleles[0]] += 1
-                allele_counts[s.gt_alleles[1]] += 1
+                allele_counts.update([s.gt_alleles[0]])
+                allele_counts.update([s.gt_alleles[1]])
         return [allele_counts[str(i)]/num_chroms for i in range(1, len(self.ALT)+1)]
 
     @property
@@ -247,7 +246,7 @@ def nucl_diversity(self):
         # skip if more than one alternate allele. assumes bi-allelic
         if len(self.ALT) > 1:
             return None
-        p = self.aaf
+        p = self.aaf[0]
         q = 1.0 - p
         num_chroms = float(2.0 * self.num_called)
         return float(num_chroms / (num_chroms - 1.0)) * (2.0 * p * q)
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index a882d5b..5a3c3ec 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -445,13 +445,13 @@ def test_aaf(self):
         for var in reader:
             aaf = var.aaf
             if var.POS == 14370:
-                self.assertEqual(3.0/6.0, aaf)
+                self.assertEqual([3.0/6.0], aaf)
             if var.POS == 17330:
-                self.assertEqual(1.0/6.0, aaf)
+                self.assertEqual([1.0/6.0], aaf)
             if var.POS == 1110696:
                 self.assertEqual([2.0/6.0, 4.0/6.0], aaf)
             if var.POS == 1230237:
-                self.assertEqual(0.0/6.0, aaf)
+                self.assertEqual([0.0/6.0], aaf)
             elif var.POS == 1234567:
                 self.assertEqual([2.0/4.0, 1.0/4.0], aaf)
 

From 36b4b68c6b76b219509cec2efb45cf3dd85ff7eb Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Mon, 2 Dec 2013 14:03:54 -0500
Subject: [PATCH 067/168] Changed aaf to use collections.Counter. Made aaf
 return a list with frequencies for all alternate alleles

---
 README.rst      |  2 +-
 vcf/__init__.py |  2 +-
 vcf/model.py    | 13 -------------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/README.rst b/README.rst
index 52bd780..86e9fda 100644
--- a/README.rst
+++ b/README.rst
@@ -66,7 +66,7 @@ examine properties of interest::
     >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
     1 1 1
     >>> print record.nucl_diversity, record.aaf
-    0.6 0.5
+    0.6 [0.5]
     >>> print record.get_hets()
     [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
     >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 3e49b09..13420df 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -67,7 +67,7 @@
     >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
     1 1 1
     >>> print record.nucl_diversity, record.aaf
-    0.6 0.5
+    0.6 [0.5]
     >>> print record.get_hets()
     [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
     >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
diff --git a/vcf/model.py b/vcf/model.py
index 1cf94a8..dd60df5 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -210,19 +210,6 @@ def aaf(self):
         """ A list of allele frequencies of alternate alleles.
            NOTE: Denominator calc'ed from _called_ genotypes.
         """
-        if len(self.ALT) > 1:
-            return self.multi_aaf
-        het = self.num_het
-        hom_alt = self.num_hom_alt
-        num_chroms = float(2.0 * self.num_called)
-        return [float(het + 2 * hom_alt) / float(num_chroms)]
-
-    @property
-    def multi_aaf(self):
-        """
-        The allele frequency of alternate alleles for multi-allelic loci.
-        Return a list of frequencies for each alternate allele.
-        """
         num_chroms = 2.0 * self.num_called
         allele_counts = collections.Counter()
         for s in self.samples:

From 5497120af5e50f2031135da21afd7c813300d8e5 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Tue, 3 Dec 2013 11:55:38 +0100
Subject: [PATCH 068/168] Add custom equality function as walk_together
 argument

---
 vcf/test/test_vcf.py      | 36 ++++++++++++++++++++++++++++++++++++
 vcf/test/walk_refcall.vcf | 22 ++++++++++++++++++++++
 vcf/utils.py              | 20 ++++++++++++++------
 3 files changed, 72 insertions(+), 6 deletions(-)
 create mode 100644 vcf/test/walk_refcall.vcf

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 730e975..daba688 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -950,6 +950,42 @@ def test_walk(self):
                 assert recs[0] is not None
                 assert recs[1] is not None
 
+        # case with working custom equality function
+
+        # without custom function, exception should be raised
+
+        reader1 = vcf.Reader(fh('example-4.0.vcf'))
+        reader2 = vcf.Reader(fh('walk_refcall.vcf'))
+        self.assertRaisesRegexp(AttributeError, "'NoneType' object has no "
+                "attribute 'type'", next, utils.walk_together(reader1, reader2))
+
+        # with custom function, iteration works
+
+        reader1 = vcf.Reader(fh('example-4.0.vcf'))
+        reader2 = vcf.Reader(fh('walk_refcall.vcf'))
+
+        def custom_eq(rec1, rec2):
+            # check for equality only on CHROM, POS, and REF
+            if rec1 is None or rec2 is None:
+                return False
+            return rec1.CHROM == rec2.CHROM and rec1.POS == rec2.POS and \
+                    rec1.REF == rec2.REF
+
+        nrecs, ncomps = 0, 0
+        for x in utils.walk_together(reader1, reader2, eq_func=custom_eq):
+            assert len(x) == 2
+            # avoid assert() when one record is None
+            if x[0] is not None and x[1] is not None:
+                assert (custom_eq(x[0], x[1]) and custom_eq(x[1], x[0]))
+                ncomps += 1
+            # still increment counter to ensure iteration is finished for all
+            # records
+            nrecs += 1
+        # check number of records total
+        assert nrecs == 5
+        # check how many records found in all files
+        assert ncomps == 4
+
     def test_trim(self):
         tests = [('TAA GAA', 'T G'),
                  ('TA TA', 'T T'),
diff --git a/vcf/test/walk_refcall.vcf b/vcf/test/walk_refcall.vcf
new file mode 100644
index 0000000..e93aeff
--- /dev/null
+++ b/vcf/test/walk_refcall.vcf
@@ -0,0 +1,22 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	14370	rs6054257	G	.	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	0|0:48:8:51,51	0/0:43:5:.,.
+20	17330	.	T	.	3.0	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|0:3:5:65,3	0/0:41:3
+20	1110696	rs6040355	A	.	1e+03	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	0|0:21:6:23,27	0|0:2:0:18,2	0/0:35:4
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
diff --git a/vcf/utils.py b/vcf/utils.py
index ec6f686..0ab04ca 100644
--- a/vcf/utils.py
+++ b/vcf/utils.py
@@ -2,14 +2,25 @@
 Utilities for VCF files.
 """
 
+import operator
 
-def walk_together(*readers):
+
+def walk_together(*readers, **kwargs):
     """ Simultaneously iteratate two or more VCF readers and return
         lists of concurrent records from each
         reader, with None if no record present.  Caller must check the
         inputs are sorted in the same way and use the same reference
         otherwise behaviour is undefined.
     """
+    # if defined, custom equality functions must take the same arguments
+    # as operator.eq
+    if 'eq_func' in kwargs:
+        eq_func = kwargs['eq_func']
+    # by default, we use the equality operator (==), which compares
+    # equality in CHROM, POS, REF, and ALT
+    else:
+        eq_func = operator.eq
+
     # if one of the VCFs has no records, StopIteration is
     # raised immediately, so we need to check for that and
     # deal appropriately
@@ -23,15 +34,12 @@ def walk_together(*readers):
     while True:
         min_next = min([x for x in nexts if x is not None])
 
-        # this line uses equality on Records, which checks the ALTs
-        # not sure what to do with records that have overlapping but different
-        # variation
-        yield [x if x is None or x == min_next else None for x in nexts]
+        yield [x if x is None or eq_func(x, min_next) else None for x in nexts]
 
         # update nexts that we just yielded
         for i, n in enumerate(nexts):
 
-            if n is not None and n == min_next:
+            if n is not None and eq_func(n, min_next):
                 try:
                     nexts[i] = readers[i].next()
                 except StopIteration:

From 226f56af4d81c5a488a28e619f1974e32a1ebf16 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 3 Dec 2013 13:26:36 +0100
Subject: [PATCH 069/168] Add dependency on collections.Counter implementation
 for Python 2.6

As per my suggestion in PR jamescasbon#131, defaultdict(int) was changed
to Counter(). However, I didn't realize it was only added in Python 2.7
and we target Python 2.6.

This approach follows what we already did for collections.OrderedDict.
---
 .travis.yml  | 2 +-
 setup.py     | 7 +++++--
 tox.ini      | 5 +++--
 vcf/model.py | 7 ++++++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a1cb3a4..f54f5da 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,7 @@ python:
   - "3.3"
   - "pypy"
 install:
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse ordereddict; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse counter ordereddict; fi"
   - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam; fi"
   - python setup.py install
 script: python setup.py test
diff --git a/setup.py b/setup.py
index bca3a0d..1ea709c 100644
--- a/setup.py
+++ b/setup.py
@@ -16,9 +16,12 @@
 except ImportError:
     requires.append('argparse')
 
-
+import collections
+try:
+    collections.Counter
+except AttributeError:
+    requires.append('counter')
 try:
-    import collections
     collections.OrderedDict
 except AttributeError:
     requires.append('ordereddict')
diff --git a/tox.ini b/tox.ini
index 8a56c17..0c07a88 100644
--- a/tox.ini
+++ b/tox.ini
@@ -14,6 +14,7 @@ commands =
 [testenv:py26]
 deps =
     argparse
+    counter
     ordereddict
     cython
     pysam
@@ -24,9 +25,9 @@ deps =
     cython
 
 [testenv:py32]
-deps = 
+deps =
     cython
 
 [testenv:py33]
-deps = 
+deps =
     cython
diff --git a/vcf/model.py b/vcf/model.py
index dd60df5..dc34319 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -2,6 +2,11 @@
 import collections
 import sys
 
+try:
+    from collections import Counter
+except ImportError:
+    from counter import Counter
+
 
 class _Call(object):
     """ A genotype call, a cell entry in a VCF file"""
@@ -211,7 +216,7 @@ def aaf(self):
            NOTE: Denominator calc'ed from _called_ genotypes.
         """
         num_chroms = 2.0 * self.num_called
-        allele_counts = collections.Counter()
+        allele_counts = Counter()
         for s in self.samples:
             if s.gt_type is not None:
                 allele_counts.update([s.gt_alleles[0]])

From 322c2121cc0175e6faa662648170028070b0d500 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 4 Dec 2013 14:35:25 +0100
Subject: [PATCH 070/168] Fix unit tests on Python 2.6 and add missing tests to
 the suite

---
 vcf/test/test_vcf.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ba9a8a3..8fcd2a8 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -956,8 +956,8 @@ def test_walk(self):
 
         reader1 = vcf.Reader(fh('example-4.0.vcf'))
         reader2 = vcf.Reader(fh('walk_refcall.vcf'))
-        self.assertRaisesRegexp(AttributeError, "'NoneType' object has no "
-                "attribute 'type'", next, utils.walk_together(reader1, reader2))
+        self.assertRaises(AttributeError, next,
+                          utils.walk_together(reader1, reader2))
 
         # with custom function, iteration works
 
@@ -1009,24 +1009,26 @@ def test_meta(self):
         assert reader.metadata['GATKCommandLine'][1]['CommandLineOptions'] == '"analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]"'
 
 
-
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestVcfSpecs))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFreebayesOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamtoolsOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoTypeCharacter))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGoNL))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoOrder))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoTypeCharacter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMixedFiltering))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestVcfSpecs))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUtils))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGATKMeta))

From b8c0af7ee8382297aaf41409a359410263df5f0c Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 4 Dec 2013 14:38:34 +0100
Subject: [PATCH 071/168] Fix comparison of _Record objects on Python 3

---
 vcf/model.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index dc34319..09013dd 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -148,6 +148,11 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
         self.samples = samples or []
         self._sample_indexes = sample_indexes
 
+    # For Python 2
+    def __cmp__(self, other):
+        return cmp((self.CHROM, self.POS), (other.CHROM, other.POS))
+
+    # For Python 3
     def __eq__(self, other):
         """ _Records are equal if they describe the same variant (same position, alleles) """
         return (self.CHROM == other.CHROM and
@@ -155,15 +160,16 @@ def __eq__(self, other):
                 self.REF == other.REF and
                 self.ALT == other.ALT)
 
+    # For Python 3
+    def __lt__(self, other):
+        return (self.CHROM, self.POS) < (other.CHROM, other.POS)
+
     def __iter__(self):
         return iter(self.samples)
 
     def __str__(self):
         return "Record(CHROM=%(CHROM)s, POS=%(POS)s, REF=%(REF)s, ALT=%(ALT)s)" % self.__dict__
 
-    def __cmp__(self, other):
-        return cmp((self.CHROM, self.POS), (other.CHROM, other.POS))
-
     def add_format(self, fmt):
         self.FORMAT = self.FORMAT + ':' + fmt
 

From 23d1fc0d663f764b955110bc79023cbbf0eb57af Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Wed, 4 Dec 2013 16:27:07 +0100
Subject: [PATCH 072/168] Update __eq__ operators to return False for
 comparison with different types

---
 vcf/model.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 09013dd..552e1c4 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -36,6 +36,8 @@ def __eq__(self, other):
         """ Two _Calls are equal if their _Records are equal
             and the samples and ``gt_type``s are the same
         """
+        if not isinstance(other, self.__class__):
+            return False
         return (self.site == other.site
                 and self.sample == other.sample
                 and self.gt_type == other.gt_type)
@@ -155,6 +157,10 @@ def __cmp__(self, other):
     # For Python 3
     def __eq__(self, other):
         """ _Records are equal if they describe the same variant (same position, alleles) """
+        # a _Record is never equal with a non-Record
+        # do the check here to avoid AttributeError (i.e. None does not have CHROM)
+        if not isinstance(other, self.__class__):
+            return False
         return (self.CHROM == other.CHROM and
                 self.POS == other.POS and
                 self.REF == other.REF and
@@ -447,6 +453,8 @@ def __str__(self):
         raise NotImplementedError
 
     def __eq__(self, other):
+        if not isinstance(other, self.__class__):
+            return False
         return self.type == other.type
 
 
@@ -473,8 +481,9 @@ def __len__(self):
     def __eq__(self, other):
         if isinstance(other, basestring):
             return self.sequence == other
-        else:
-            return super(_Substitution, self).__eq__(other) and self.sequence == other.sequence
+        elif not isinstance(other, self.__class__):
+            return False
+        return super(_Substitution, self).__eq__(other) and self.sequence == other.sequence
 
 
 class _Breakend(_AltRecord):
@@ -523,6 +532,8 @@ def __str__(self):
             return self.connectingSequence + remoteTag
 
     def __eq__(self, other):
+        if not isinstance(other, self.__class__):
+            return False
         return super(_Breakend, self).__eq__(other) \
                 and self.chr == other.chr \
                 and self.pos == other.pos \

From 1a103cd04d3f65640f82ae2ff1eb4c24ea7360b0 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Wed, 4 Dec 2013 16:34:46 +0100
Subject: [PATCH 073/168] Add tests for updated equality behavior

---
 vcf/test/test_vcf.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 8fcd2a8..5bbe824 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -425,6 +425,11 @@ def test_num_calls(self):
                          var.num_het + var.num_unknown)
             self.assertEqual(len(var.samples), num_calls)
 
+    def test_dunder_eq(self):
+        rec = vcf.Reader(fh('example-4.0.vcf')).next()
+        self.assertFalse(rec == None)
+        self.assertFalse(None == rec)
+
     def test_call_rate(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for var in reader:
@@ -733,6 +738,13 @@ def test_pickle(self):
 
 class TestCall(unittest.TestCase):
 
+    def test_dunder_eq(self):
+        reader = vcf.Reader(fh('example-4.0.vcf'))
+        var = reader.next()
+        example_call = var.samples[0]
+        self.assertFalse(example_call == None)
+        self.assertFalse(None == example_call)
+
     def test_phased(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for var in reader:

From 8dcca20936c81a639893cf1eee5f8336d594acb7 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Wed, 4 Dec 2013 16:28:22 +0100
Subject: [PATCH 074/168] Update walk_together test to accomodate __eq__
 behavior change

---
 vcf/test/test_vcf.py | 45 ++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 5bbe824..d3b6c64 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -964,17 +964,28 @@ def test_walk(self):
 
         # case with working custom equality function
 
-        # without custom function, exception should be raised
+        # without custom function, most records in these files
+        # are different since the default equality checks
+        # for ALT values
 
         reader1 = vcf.Reader(fh('example-4.0.vcf'))
         reader2 = vcf.Reader(fh('walk_refcall.vcf'))
-        self.assertRaises(AttributeError, next,
-                          utils.walk_together(reader1, reader2))
 
-        # with custom function, iteration works
+        # counters for distinct records and overlapping records
+        ndist_def, nover_def = 0, 0
+        for x in utils.walk_together(reader1, reader2):
+            assert len(x) == 2
+            if x[0] is not None and x[1] is not None:
+                assert (x[0] == x[1] and x[1] == x[0])
+                nover_def += 1
+            ndist_def += 1
+        # check how many overlapping records
+        assert nover_def == 1
+        # check how many distinct records
+        assert ndist_def == 8
 
-        reader1 = vcf.Reader(fh('example-4.0.vcf'))
-        reader2 = vcf.Reader(fh('walk_refcall.vcf'))
+        # with custom function that does not check ALT,
+        # we see more overlaps and less distinct records
 
         def custom_eq(rec1, rec2):
             # check for equality only on CHROM, POS, and REF
@@ -983,20 +994,22 @@ def custom_eq(rec1, rec2):
             return rec1.CHROM == rec2.CHROM and rec1.POS == rec2.POS and \
                     rec1.REF == rec2.REF
 
-        nrecs, ncomps = 0, 0
+        reader1 = vcf.Reader(fh('example-4.0.vcf'))
+        reader2 = vcf.Reader(fh('walk_refcall.vcf'))
+
+        ndist_cust, nover_cust = 0, 0
         for x in utils.walk_together(reader1, reader2, eq_func=custom_eq):
             assert len(x) == 2
-            # avoid assert() when one record is None
             if x[0] is not None and x[1] is not None:
                 assert (custom_eq(x[0], x[1]) and custom_eq(x[1], x[0]))
-                ncomps += 1
-            # still increment counter to ensure iteration is finished for all
-            # records
-            nrecs += 1
-        # check number of records total
-        assert nrecs == 5
-        # check how many records found in all files
-        assert ncomps == 4
+                nover_cust += 1
+            ndist_cust += 1
+        assert nover_cust == 4
+        assert ndist_cust == 5
+
+        # final check just to be absolutely sure
+        assert ndist_def != ndist_cust
+        assert nover_def != nover_cust
 
     def test_trim(self):
         tests = [('TAA GAA', 'T G'),

From d39ffa0737c4b3f478972aaf84753fc48be2da77 Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Tue, 10 Dec 2013 19:55:57 -0500
Subject: [PATCH 075/168] Adding method to compute heterozygosity for a site

---
 README.rst           |  4 ++--
 vcf/__init__.py      |  4 ++--
 vcf/model.py         | 12 ++++++++++++
 vcf/test/test_vcf.py | 15 +++++++++++++++
 4 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 86e9fda..eed7808 100644
--- a/README.rst
+++ b/README.rst
@@ -65,8 +65,8 @@ examine properties of interest::
     3 1.0 0
     >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
     1 1 1
-    >>> print record.nucl_diversity, record.aaf
-    0.6 [0.5]
+    >>> print record.nucl_diversity, record.aaf, record.heterozygosity
+    0.6 [0.5], 0.5
     >>> print record.get_hets()
     [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
     >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 13420df..647106a 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -66,8 +66,8 @@
     3 1.0 0
     >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
     1 1 1
-    >>> print record.nucl_diversity, record.aaf
-    0.6 [0.5]
+    >>> print record.nucl_diversity, record.aaf, record.heterozygosity
+    0.6 [0.5] 0.5
     >>> print record.get_hets()
     [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
     >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
diff --git a/vcf/model.py b/vcf/model.py
index 09013dd..b5a37e6 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -249,6 +249,18 @@ def nucl_diversity(self):
         num_chroms = float(2.0 * self.num_called)
         return float(num_chroms / (num_chroms - 1.0)) * (2.0 * p * q)
 
+    @property
+    def heterozygosity(self):
+        """
+        Heterozygosity of a site. Heterozygosity gives the probability that
+        two randomly chosen chromosomes from the population have different
+        alleles, giving a measurement of the degree of polymorphism in a population.
+
+        If there are i alleles with frequency p_i, H-1-sum_i(p_i^2)
+        """
+        allele_freqs = [1-sum(self.aaf)] + self.aaf
+        return 1 - sum(map(lambda x: x**2, allele_freqs))
+
     def get_hom_refs(self):
         """ The list of hom ref genotypes"""
         return [s for s in self.samples if s.gt_type == 0]
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 8fcd2a8..713656c 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -470,6 +470,21 @@ def test_pi(self):
             elif var.POS == 1234567:
                 self.assertEqual(None, pi)
 
+    def test_heterozygosity(self):
+        reader = vcf.Reader(fh('example-4.0.vcf'))
+        for var in reader:
+            het = var.heterozygosity
+            if var.POS == 14370:
+                self.assertEqual(0.5, het)
+            if var.POS == 17330:
+                self.assertEqual(1-((1.0/6)**2 + (5.0/6)**2), het)
+            if var.POS == 1110696:
+                self.assertEqual(4.0/9.0, het)
+            if var.POS == 1230237:
+                self.assertEqual(0.0, het)
+            elif var.POS == 1234567:
+                self.assertEqual(5.0/8.0, het)
+
     def test_is_snp(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for r in reader:

From cfade353527233e027b625f0d76da3db2e8f75ba Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Tue, 10 Dec 2013 19:57:59 -0500
Subject: [PATCH 076/168] added heterozygosity method, fixed typo in docstring

---
 vcf/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index b5a37e6..5d5acd6 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -254,9 +254,9 @@ def heterozygosity(self):
         """
         Heterozygosity of a site. Heterozygosity gives the probability that
         two randomly chosen chromosomes from the population have different
-        alleles, giving a measurement of the degree of polymorphism in a population.
+        alleles, giving a measure of the degree of polymorphism in a population.
 
-        If there are i alleles with frequency p_i, H-1-sum_i(p_i^2)
+        If there are i alleles with frequency p_i, H=1-sum_i(p_i^2)
         """
         allele_freqs = [1-sum(self.aaf)] + self.aaf
         return 1 - sum(map(lambda x: x**2, allele_freqs))

From 1bd477a141a749c7c401f392681ce76f075ce10a Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Tue, 10 Dec 2013 19:59:54 -0500
Subject: [PATCH 077/168] fixed small typo in readme.rst for heterozygosity...

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index eed7808..a60c0c8 100644
--- a/README.rst
+++ b/README.rst
@@ -66,7 +66,7 @@ examine properties of interest::
     >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
     1 1 1
     >>> print record.nucl_diversity, record.aaf, record.heterozygosity
-    0.6 [0.5], 0.5
+    0.6 [0.5] 0.5
     >>> print record.get_hets()
     [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
     >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion

From 9c3822d63f161cdeaad6c6ebfb0d359a8981736e Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Sat, 11 Jan 2014 11:43:37 +0100
Subject: [PATCH 078/168] Ensure spurious line ending characters on records are
 stripped away

---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 8996d37..222f1a8 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -523,7 +523,7 @@ def _parse_alt(self, str):
 
     def next(self):
         '''Return the next record in the file.'''
-        line = self.reader.next()
+        line = self.reader.next().rstrip()
         row = re.split(self._separator, line)
         chrom = row[0]
         if self._prepend_chr:

From a60ef2f88da920f237a5565016626290facbfe32 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Sat, 11 Jan 2014 11:52:38 +0100
Subject: [PATCH 079/168] Fix so conversion to Py3 works

---
 vcf/parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 222f1a8..26948cb 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -523,8 +523,8 @@ def _parse_alt(self, str):
 
     def next(self):
         '''Return the next record in the file.'''
-        line = self.reader.next().rstrip()
-        row = re.split(self._separator, line)
+        line = self.reader.next()
+        row = re.split(self._separator, line.rstrip())
         chrom = row[0]
         if self._prepend_chr:
             chrom = 'chr' + chrom

From a06f583577b09a1af68933a0d6d4194d3ebcf161 Mon Sep 17 00:00:00 2001
From: Zhaorong Ma <azalea@zhaorongs-air.softgenetics.lan>
Date: Wed, 5 Feb 2014 11:11:37 -0500
Subject: [PATCH 080/168] Changed the default line ending in vcf.Writer() to
 '\n'.

---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 8996d37..8c56ab8 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -606,7 +606,7 @@ class Writer(object):
     # Reverse keys and values in header field count dictionary
     counts = dict((v,k) for k,v in field_counts.iteritems())
 
-    def __init__(self, stream, template, lineterminator="\r\n"):
+    def __init__(self, stream, template, lineterminator="\n"):
         self.writer = csv.writer(stream, delimiter="\t", lineterminator=lineterminator)
         self.template = template
         self.stream = stream

From 7c2710358ebd76c2bc00d2e7fe09889f0a6abbbe Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Thu, 6 Feb 2014 16:02:29 +0000
Subject: [PATCH 081/168] version 0.6.5

---
 docs/HISTORY.rst | 14 ++++++++++++++
 vcf/__init__.py  |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 15aba83..2b03818 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,20 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.5 Release
+-------------
+
+* Better contig handling (#115, #116, #119 thanks Martijn)
+* INFO lines with type character (#120, #121 thanks @AndrewUzilov, Martijn)
+* Single breakends fix (#126 thanks @pkrushe)
+* Speedup by losing ordering of INFO (#128 thanks Martijn)
+* HOMSEQ and other missing fields in INFO (#130 thanks Martijn)
+* Add aaf property, (thanks @mgymrek #131)
+* Custom equality for walk_together, thanks bow #132
+* Change default line encoding to '\n'
+* Improved __eq__ (#134, thanks bow)
+
+
 0.6.4 Release
 -------------
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 647106a..ec89aee 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.4'
+VERSION = '0.6.5'

From 616f31093ef63b3edc0bf059e89324485dfc19b0 Mon Sep 17 00:00:00 2001
From: datagram <datagram@users.noreply.github.com>
Date: Thu, 6 Feb 2014 01:08:21 -0800
Subject: [PATCH 082/168] Fix for issue #140, add vcf_record_sort_key arg

- Added 'vcf_record_sort_key' to allow user to specify arbitrary chromosome ordering.
- Fixed issue #140 by making sure to emit all records from the current chromosome before moving on to the next one. This takes care of the problem in most typical cases (eg. when all files have records for all contigs), but not in some edge cases, in which case the 'vcf_record_sort_key' arg can be used to fully solve the problem by explicitly defining the chromosome order.
---
 vcf/utils.py | 85 ++++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/vcf/utils.py b/vcf/utils.py
index 0ab04ca..c4dea1a 100644
--- a/vcf/utils.py
+++ b/vcf/utils.py
@@ -2,51 +2,50 @@
 Utilities for VCF files.
 """
 
-import operator
+def walk_together(*readers, **kwargs):	
+	""" Simultaneously iteratate two or more VCF readers and return
+		lists of concurrent records from each
+		reader, with None if no record present.  Caller must check the
+		inputs are sorted in the same way and use the same reference
+		otherwise behaviour is undefined.
+		
+		Args:
+			vcf_record_sort_key: function that takes a VCF record and returns a tuple that can be used as the key for comparing and sorting VCF records across all given VCFReaders. The tuple's 1st element should be the contig name.
+	"""
+	if 'vcf_record_sort_key' in kwargs:
+		get_key = kwargs['vcf_record_sort_key']
+	else:
+		get_key = lambda r: (r.CHROM, r.POS)
+	
+	nexts = []
+	for reader in readers:
+		try:
+			nexts.append(reader.next())
+		except StopIteration:
+			nexts.append(None)
 
+	min_k = (None,)   # keep track of the previous min key's contig
+	while True:
+		kdict = {i: get_key(x) for i,x in enumerate(nexts) if x is not None}
+		keys_with_prev_contig = [k for k in kdict.values() if k[0] == min_k[0]]
+		if any(keys_with_prev_contig):
+			# finish all records from previous contig	
+			min_k = min(keys_with_prev_contig) 
+		else:			
+			# move on to the next contig
+			min_k = min(kdict.values())  
+		
+		min_k_idxs = set([i for i, k in kdict.items() if k == min_k])
+		yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))]
 
-def walk_together(*readers, **kwargs):
-    """ Simultaneously iteratate two or more VCF readers and return
-        lists of concurrent records from each
-        reader, with None if no record present.  Caller must check the
-        inputs are sorted in the same way and use the same reference
-        otherwise behaviour is undefined.
-    """
-    # if defined, custom equality functions must take the same arguments
-    # as operator.eq
-    if 'eq_func' in kwargs:
-        eq_func = kwargs['eq_func']
-    # by default, we use the equality operator (==), which compares
-    # equality in CHROM, POS, REF, and ALT
-    else:
-        eq_func = operator.eq
-
-    # if one of the VCFs has no records, StopIteration is
-    # raised immediately, so we need to check for that and
-    # deal appropriately
-    nexts = []
-    for reader in readers:
-        try:
-            nexts.append(reader.next())
-        except StopIteration:
-            nexts.append(None)
-
-    while True:
-        min_next = min([x for x in nexts if x is not None])
-
-        yield [x if x is None or eq_func(x, min_next) else None for x in nexts]
-
-        # update nexts that we just yielded
-        for i, n in enumerate(nexts):
-
-            if n is not None and eq_func(n, min_next):
-                try:
-                    nexts[i] = readers[i].next()
-                except StopIteration:
-                    nexts[i] = None
-
-        if all([x is None for x in nexts]):
-            break
+		for i in min_k_idxs:
+			try:
+				nexts[i] = readers[i].next()
+			except StopIteration:
+				nexts[i] = None
+				
+		if all([x is None for x in nexts]):
+			break
 
 
 def trim_common_suffix(*sequences):

From 2de70ce85f1fb15511ee4f5302ee13224cd1b0eb Mon Sep 17 00:00:00 2001
From: Ben Weisburd <weisbbe1@skywalker-1-15.local>
Date: Thu, 6 Feb 2014 13:35:25 -0800
Subject: [PATCH 083/168] Fixed spacing and wrapping in utils.py, removed test
 for old walk_together arg (eq function), fixed edge case in _AltRecord

---
 vcf/model.py         |  4 +-
 vcf/test/test_vcf.py | 72 +++++++++++++++++-----------------
 vcf/utils.py         | 93 ++++++++++++++++++++++++--------------------
 3 files changed, 89 insertions(+), 80 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 21eabc5..266b941 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -465,9 +465,7 @@ def __str__(self):
         raise NotImplementedError
 
     def __eq__(self, other):
-        if not isinstance(other, self.__class__):
-            return False
-        return self.type == other.type
+        return self.type == getattr(other, 'type', None)
 
 
 class _Substitution(_AltRecord):
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index dcc54f4..9853ced 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -20,7 +20,7 @@ class TestVcfSpecs(unittest.TestCase):
 
     def test_vcf_4_0(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
-        assert reader.metadata['fileformat'] == 'VCFv4.0'
+        self.assertEqual(reader.metadata['fileformat'], 'VCFv4.0')
 
         # test we can walk the file at least
         for r in reader:
@@ -81,21 +81,21 @@ def test_vcf_4_1_bnd(self):
             for a in r.ALT:
                 print(a)
             if r.ID == "bnd1":
-                    assert len(r.ALT) == 1
-                    assert r.ALT[0].type == "BND"
-                    assert r.ALT[0].chr == "2"
-                    assert r.ALT[0].pos == 3
-                    assert r.ALT[0].orientation == False
-                    assert r.ALT[0].remoteOrientation == True
-                    assert r.ALT[0].connectingSequence == "T"
+                    self.assertEqual(len(r.ALT), 1)
+                    self.assertEqual(r.ALT[0].type, "BND")
+                    self.assertEqual(r.ALT[0].chr, "2")
+                    self.assertEqual(r.ALT[0].pos, 3)
+                    self.assertEqual(r.ALT[0].orientation, False)
+                    self.assertEqual(r.ALT[0].remoteOrientation, True)
+                    self.assertEqual(r.ALT[0].connectingSequence, "T")
             if r.ID == "bnd4":
-                    assert len(r.ALT) == 1
-                    assert r.ALT[0].type == "BND"
-                    assert r.ALT[0].chr == "1"
-                    assert r.ALT[0].pos == 2
-                    assert r.ALT[0].orientation == True
-                    assert r.ALT[0].remoteOrientation == False
-                    assert r.ALT[0].connectingSequence == "G"
+                    self.assertEqual(len(r.ALT), 1)
+                    self.assertEqual(r.ALT[0].type, "BND")
+                    self.assertEqual(r.ALT[0].chr, "1")
+                    self.assertEqual(r.ALT[0].pos, 2)
+                    self.assertEqual(r.ALT[0].orientation, True)
+                    self.assertEqual(r.ALT[0].remoteOrientation, False)
+                    self.assertEqual(r.ALT[0].connectingSequence, "G")
             for c in r:
                 print(c)
                 assert c
@@ -165,7 +165,7 @@ def testParse(self):
             n+=1
             for x in r:
                 assert x
-        assert n == self.n_calls
+        self.assertEqual(n, self.n_calls)
 
 class TestSamtoolsOutput(unittest.TestCase):
 
@@ -748,7 +748,7 @@ def test_info_multiple_values(self):
     def test_pickle(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for var in reader:
-            assert cPickle.loads(cPickle.dumps(var)) == var
+            self.assertEqual(cPickle.loads(cPickle.dumps(var)), var)
 
 
 class TestCall(unittest.TestCase):
@@ -836,7 +836,7 @@ def testFetchSite(self):
         if not self.run:
             return
         site = self.reader.fetch('20', 14370)
-        assert site.POS == 14370
+        self.assertEqual(site.POS, 14370)
 
         site = self.reader.fetch('20', 14369)
         assert site is None
@@ -878,7 +878,7 @@ def testApplyFilter(self):
         return
         s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq')
         #print(out)
-        assert s == 0
+        self.assertEqual(s, 0)
         buf = StringIO()
         buf.write(out)
         buf.seek(0)
@@ -900,7 +900,7 @@ def testApplyFilter(self):
                 n += 1
             else:
                 assert 'sq30' not in r.FILTER
-        assert n == 2
+        self.assertEqual(n, 2)
 
 
     def testApplyMultipleFilters(self):
@@ -908,7 +908,7 @@ def testApplyMultipleFilters(self):
         return
         s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 '
         '--genotype-quality 50 test/example-4.0.vcf sq mgq')
-        assert s == 0
+        self.assertEqual(s, 0)
         #print(out)
         buf = StringIO()
         buf.write(out)
@@ -954,10 +954,11 @@ def test_walk(self):
 
         n = 0
         for x in utils.walk_together(reader1, reader2, reader3):
-            assert len(x) == 3
-            assert (x[0] == x[1]) and (x[1] == x[2])
+            self.assertEqual(len(x), 3)
+            self.assertEqual(x[0], x[1])
+            self.assertEqual(x[1], x[2])
             n+= 1
-        assert n == 5
+        self.assertEqual(n, 5)
 
         # artificial case 2 from the left, 2 from the right, 2 together, 1 from the right, 1 from the left
 
@@ -1014,17 +1015,18 @@ def custom_eq(rec1, rec2):
 
         ndist_cust, nover_cust = 0, 0
         for x in utils.walk_together(reader1, reader2, eq_func=custom_eq):
-            assert len(x) == 2
+            self.assertEqual(len(x), 2)
+            # avoid assert() when one record is None
             if x[0] is not None and x[1] is not None:
                 assert (custom_eq(x[0], x[1]) and custom_eq(x[1], x[0]))
-                nover_cust += 1
-            ndist_cust += 1
-        assert nover_cust == 4
-        assert ndist_cust == 5
-
-        # final check just to be absolutely sure
-        assert ndist_def != ndist_cust
-        assert nover_def != nover_cust
+                ncomps += 1
+            # still increment counter to ensure iteration is finished for all
+            # records
+            nrecs += 1
+        # check number of records total
+        self.assertEqual(nrecs, 5)
+        # check how many records found in all files
+        self.assertEqual(ncomps, 4)
 
     def test_trim(self):
         tests = [('TAA GAA', 'T G'),
@@ -1045,8 +1047,8 @@ def test_meta(self):
         # expect no exceptions raised
         reader = vcf.Reader(fh('gatk_26_meta.vcf'))
         assert 'GATKCommandLine' in reader.metadata
-        assert reader.metadata['GATKCommandLine'][0]['CommandLineOptions'] == '"analysis_type=LeftAlignAndTrimVariants"'
-        assert reader.metadata['GATKCommandLine'][1]['CommandLineOptions'] == '"analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]"'
+        self.assertEqual(reader.metadata['GATKCommandLine'][0]['CommandLineOptions'], '"analysis_type=LeftAlignAndTrimVariants"')
+        self.assertEqual(reader.metadata['GATKCommandLine'][1]['CommandLineOptions'], '"analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]"')
 
 
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestVcfSpecs))
diff --git a/vcf/utils.py b/vcf/utils.py
index c4dea1a..09a6668 100644
--- a/vcf/utils.py
+++ b/vcf/utils.py
@@ -2,50 +2,59 @@
 Utilities for VCF files.
 """
 
-def walk_together(*readers, **kwargs):	
-	""" Simultaneously iteratate two or more VCF readers and return
-		lists of concurrent records from each
-		reader, with None if no record present.  Caller must check the
-		inputs are sorted in the same way and use the same reference
-		otherwise behaviour is undefined.
-		
-		Args:
-			vcf_record_sort_key: function that takes a VCF record and returns a tuple that can be used as the key for comparing and sorting VCF records across all given VCFReaders. The tuple's 1st element should be the contig name.
-	"""
-	if 'vcf_record_sort_key' in kwargs:
-		get_key = kwargs['vcf_record_sort_key']
-	else:
-		get_key = lambda r: (r.CHROM, r.POS)
-	
-	nexts = []
-	for reader in readers:
-		try:
-			nexts.append(reader.next())
-		except StopIteration:
-			nexts.append(None)
+def walk_together(*readers, **kwargs):
+    """
+    Simultaneously iteratate over two or more VCF readers. For each 
+    genomic position with a variant, return a list of size equal to the number 
+    of VCF readers. This list contains the VCF record from readers that have
+    this variant, and None for readers that don't have it. 
+    The caller must make sure that inputs are sorted in the same way and use the 
+    same reference otherwise behaviour is undefined.
+
+    Args:
+        vcf_record_sort_key: function that takes a VCF record and returns a 
+            tuple that can be used as a key for comparing and sorting VCF 
+            records across all readers. This tuple defines what it means for two 
+            variants to be equal (eg. whether it's only their position or also 
+            their allele values), and implicitly determines the chromosome 
+            ordering since the tuple's 1st element is typically the chromosome 
+            name (or calculated from it).
+    """
+    if 'vcf_record_sort_key' in kwargs:
+        get_key = kwargs['vcf_record_sort_key']
+    else:
+        get_key = lambda r: (r.CHROM, r.POS) #, r.REF, r.ALT)
+
+    nexts = []
+    for reader in readers:
+        try:
+            nexts.append(reader.next())
+        except StopIteration:
+            nexts.append(None)
+
+    min_k = (None,)   # keep track of the previous min key's contig
+    while True:
+        next_idx_to_k = dict(
+            (i, get_key(r)) for i, r in enumerate(nexts) if r is not None)
+        keys_with_prev_contig = [
+            k for k in next_idx_to_k.values() if k[0] == min_k[0]]
+
+        if any(keys_with_prev_contig):
+            min_k = min(keys_with_prev_contig)   # finish previous contig
+        else:
+            min_k = min(next_idx_to_k.values())   # move on to next contig
 
-	min_k = (None,)   # keep track of the previous min key's contig
-	while True:
-		kdict = {i: get_key(x) for i,x in enumerate(nexts) if x is not None}
-		keys_with_prev_contig = [k for k in kdict.values() if k[0] == min_k[0]]
-		if any(keys_with_prev_contig):
-			# finish all records from previous contig	
-			min_k = min(keys_with_prev_contig) 
-		else:			
-			# move on to the next contig
-			min_k = min(kdict.values())  
-		
-		min_k_idxs = set([i for i, k in kdict.items() if k == min_k])
-		yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))]
+        min_k_idxs = set([i for i, k in next_idx_to_k.items() if k == min_k])
+        yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))]
 
-		for i in min_k_idxs:
-			try:
-				nexts[i] = readers[i].next()
-			except StopIteration:
-				nexts[i] = None
-				
-		if all([x is None for x in nexts]):
-			break
+        for i in min_k_idxs:
+            try:
+                nexts[i] = readers[i].next()
+            except StopIteration:
+                nexts[i] = None
+                
+        if all([r is None for r in nexts]):
+            break
 
 
 def trim_common_suffix(*sequences):

From d7563dc80a1ef645e1c22a860183ec3472c835ec Mon Sep 17 00:00:00 2001
From: Ben Weisburd <weisbbe1@skywalker-1-15.local>
Date: Thu, 6 Feb 2014 14:16:19 -0800
Subject: [PATCH 084/168] Fixed edge case where all inputs are empty,
 simplified logic

---
 vcf/utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vcf/utils.py b/vcf/utils.py
index 09a6668..456e5fa 100644
--- a/vcf/utils.py
+++ b/vcf/utils.py
@@ -33,7 +33,7 @@ def walk_together(*readers, **kwargs):
             nexts.append(None)
 
     min_k = (None,)   # keep track of the previous min key's contig
-    while True:
+    while any([r is not None for r in nexts]):
         next_idx_to_k = dict(
             (i, get_key(r)) for i, r in enumerate(nexts) if r is not None)
         keys_with_prev_contig = [
@@ -52,9 +52,6 @@ def walk_together(*readers, **kwargs):
                 nexts[i] = readers[i].next()
             except StopIteration:
                 nexts[i] = None
-                
-        if all([r is None for r in nexts]):
-            break
 
 
 def trim_common_suffix(*sequences):

From ce4d20f1b92fb9a3c5d9e21f6b04bf3bde566f94 Mon Sep 17 00:00:00 2001
From: datagram <weisbbe1@skywalker-1-15.local>
Date: Fri, 7 Feb 2014 03:20:20 -0800
Subject: [PATCH 085/168] finished fixing edge case where 'other' is None

---
 vcf/model.py | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 266b941..c6e8f42 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -36,11 +36,9 @@ def __eq__(self, other):
         """ Two _Calls are equal if their _Records are equal
             and the samples and ``gt_type``s are the same
         """
-        if not isinstance(other, self.__class__):
-            return False
-        return (self.site == other.site
-                and self.sample == other.sample
-                and self.gt_type == other.gt_type)
+        return (self.site == getattr(other, "site", None)
+                and self.sample == getattr(other, "sample", None)
+                and self.gt_type == getattr(other, "gt_type", None))
 
     def __getstate__(self):
         return dict((attr, getattr(self, attr)) for attr in self.__slots__)
@@ -152,23 +150,19 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
 
     # For Python 2
     def __cmp__(self, other):
-        return cmp((self.CHROM, self.POS), (other.CHROM, other.POS))
+        return cmp((self.CHROM, self.POS), (getattr(other, "CHROM", None), getattr(other, "POS", None)))
 
     # For Python 3
     def __eq__(self, other):
         """ _Records are equal if they describe the same variant (same position, alleles) """
-        # a _Record is never equal with a non-Record
-        # do the check here to avoid AttributeError (i.e. None does not have CHROM)
-        if not isinstance(other, self.__class__):
-            return False
-        return (self.CHROM == other.CHROM and
-                self.POS == other.POS and
-                self.REF == other.REF and
-                self.ALT == other.ALT)
+        return (self.CHROM == getattr(other, "CHROM", None) and
+                self.POS == getattr(other, "POS", None) and
+                self.REF == getattr(other, "REF", None) and
+                self.ALT == getattr(other, "ALT", None))
 
     # For Python 3
     def __lt__(self, other):
-        return (self.CHROM, self.POS) < (other.CHROM, other.POS)
+        return (self.CHROM, self.POS) < (getattr(other, "CHROM", None), getattr(other, "POS", None))
 
     def __iter__(self):
         return iter(self.samples)
@@ -545,12 +539,12 @@ def __eq__(self, other):
         if not isinstance(other, self.__class__):
             return False
         return super(_Breakend, self).__eq__(other) \
-                and self.chr == other.chr \
-                and self.pos == other.pos \
-                and self.remoteOrientation == other.remoteOrientation \
-                and self.withinMainAssembly == other.withinMainAssembly \
-                and self.orientation == other.orientation \
-                and self.connectingSequence == other.connectingSequence
+                and self.chr == getattr(other, "chr", None) \
+                and self.pos == getattr(other, "pos", None) \
+                and self.remoteOrientation == getattr(other, "remoteOrientation", None) \
+                and self.withinMainAssembly == getattr(other, "withinMainAssembly", None) \
+                and self.orientation == getattr(other, "orientation", None) \
+                and self.connectingSequence == getattr(other, "connectingSequence", None)
 
 
 class _SingleBreakend(_Breakend):

From d51db2303913b725612a29ad07b34968e947cfa3 Mon Sep 17 00:00:00 2001
From: datagram <weisbbe1@skywalker-1-15.local>
Date: Fri, 7 Feb 2014 03:21:19 -0800
Subject: [PATCH 086/168] Test data for testing the fix for issue #140

---
 vcf/test/issue-140-file1.vcf | 35 +++++++++++++++++++++++++++++++++++
 vcf/test/issue-140-file2.vcf | 34 ++++++++++++++++++++++++++++++++++
 vcf/test/issue-140-file3.vcf | 25 +++++++++++++++++++++++++
 3 files changed, 94 insertions(+)
 create mode 100644 vcf/test/issue-140-file1.vcf
 create mode 100644 vcf/test/issue-140-file2.vcf
 create mode 100644 vcf/test/issue-140-file3.vcf

diff --git a/vcf/test/issue-140-file1.vcf b/vcf/test/issue-140-file1.vcf
new file mode 100644
index 0000000..8ee2de2
--- /dev/null
+++ b/vcf/test/issue-140-file1.vcf
@@ -0,0 +1,35 @@
+##fileformat=VCFv4.1
+##source=VarScan2
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
+##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
+##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
+##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
+##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
+##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL
+chr1	10	.	G	GGT	.	PASS	DP=91;SS=1;SSC=2;GPV=3.0109E-23;SPV=5.8324E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:36:13:22:62.86%:2,11,1,21
+chr1	20	.	GT	G	.	PASS	DP=77;SS=1;SSC=2;GPV=2.4504E-29;SPV=6.0772E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:28:5:22:81.48%:0,5,1,21
+chr2	30	.	AC	A	.	PASS	DP=22;SS=1;SSC=7;GPV=1.3117E-10;SPV=1.9481E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:10:2:8:80%:0,2,0,8
+chr2	40	.	AAAC	A	.	PASS	DP=42;SS=1;SSC=12;GPV=7.3092E-18;SPV=6.278E-2	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:13:4:9:69.23%:4,0,9,0
+chr3	50	.	TC	T	.	PASS	DP=41;SS=1;SSC=2;GPV=9.8874E-23;SPV=5.3659E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:22:1:21:95.45%:1,0,15,6
+chr10	60	.	T	TTAA	.	PASS	DP=27;SS=1;SSC=2;GPV=1.4382E-14;SPV=5.5556E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:12:0:12:100%:0,0,0,12
+chr10	70	.	C	CTG	.	PASS	DP=40;SS=1;SSC=7;GPV=3.6006E-9;SPV=1.9922E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:10:6:4:40%:0,6,0,4
+chr11	80	.	AGTT	A	.	PASS	DP=86;SS=1;SSC=0;GPV=4.1554E-34;SPV=8.5795E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:32:4:28:87.5%:1,3,0,28
+chr11	90	.	GA	G	.	PASS	DP=41;SS=1;SSC=3;GPV=1.9197E-12;SPV=4.089E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:15:5:9:64.29%:1,4,0,9
+chr20	100	.	TTTTG	T	.	PASS	DP=23;SS=1;SSC=1;GPV=2.9149E-12;SPV=6.5217E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:8:0:8:100%:0,0,7,1
+chr20	110	.	GA	G	.	PASS	DP=83;SS=1;SSC=13;GPV=1E0;SPV=4.0806E-2	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:35:5:28:84.85%:4,1,12,16
+chrX	120	.	G	GA	.	PASS	DP=61;SS=1;SSC=1;GPV=1.6967E-25;SPV=7.0485E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:22:3:19:86.36%:0,3,1,18
+chrX	130	.	T	TAA	.	PASS	DP=19;SS=1;SSC=1;GPV=1.1285E-5;SPV=7.2172E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:8:2:5:71.43%:0,2,0,5
+chrY	140	.	G	GTTT	.	PASS	DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:36:2:19:90.48%:1,1,15,4
+chrY	150	.	T	TGAAG	.	PASS	DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:13:5:8:61.54%:4,1,2,6
+chrM	160	.	G	GTTT	.	PASS	DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:36:2:19:90.48%:1,1,15,4
+chrM	170	.	T	TGAAG	.	PASS	DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:13:5:8:61.54%:4,1,2,6
diff --git a/vcf/test/issue-140-file2.vcf b/vcf/test/issue-140-file2.vcf
new file mode 100644
index 0000000..7852133
--- /dev/null
+++ b/vcf/test/issue-140-file2.vcf
@@ -0,0 +1,34 @@
+##fileformat=VCFv4.1
+##source=VarScan2
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
+##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
+##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
+##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
+##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
+##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL
+chr1	10	.	G	GGT	.	PASS	DP=91;SS=1;SSC=2;GPV=3.0109E-23;SPV=5.8324E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:36:13:22:62.86%:2,11,1,21
+chr1	20	.	GT	G	.	PASS	DP=77;SS=1;SSC=2;GPV=2.4504E-29;SPV=6.0772E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:28:5:22:81.48%:0,5,1,21
+chr2	30	.	AC	A	.	PASS	DP=22;SS=1;SSC=7;GPV=1.3117E-10;SPV=1.9481E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:10:2:8:80%:0,2,0,8
+chr2	41	.	AAAC	A	.	PASS	DP=42;SS=1;SSC=12;GPV=7.3092E-18;SPV=6.278E-2	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:13:4:9:69.23%:4,0,9,0
+chr10	60	.	T	TTAA	.	PASS	DP=27;SS=1;SSC=2;GPV=1.4382E-14;SPV=5.5556E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:12:0:12:100%:0,0,0,12
+chr10	70	.	C	CTG	.	PASS	DP=40;SS=1;SSC=7;GPV=3.6006E-9;SPV=1.9922E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:10:6:4:40%:0,6,0,4
+chr11	80	.	AGTT	A	.	PASS	DP=86;SS=1;SSC=0;GPV=4.1554E-34;SPV=8.5795E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:32:4:28:87.5%:1,3,0,28
+chr11	91	.	GA	G	.	PASS	DP=41;SS=1;SSC=3;GPV=1.9197E-12;SPV=4.089E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:15:5:9:64.29%:1,4,0,9
+chr20	100	.	TTTTG	T	.	PASS	DP=23;SS=1;SSC=1;GPV=2.9149E-12;SPV=6.5217E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:8:0:8:100%:0,0,7,1
+chr20	110	.	GA	G	.	PASS	DP=83;SS=1;SSC=13;GPV=1E0;SPV=4.0806E-2	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:35:5:28:84.85%:4,1,12,16
+chrX	120	.	G	GA	.	PASS	DP=61;SS=1;SSC=1;GPV=1.6967E-25;SPV=7.0485E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:22:3:19:86.36%:0,3,1,18
+chrX	130	.	T	TAA	.	PASS	DP=19;SS=1;SSC=1;GPV=1.1285E-5;SPV=7.2172E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:8:2:5:71.43%:0,2,0,5
+chrY	140	.	G	GTTT	.	PASS	DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:36:2:19:90.48%:1,1,15,4
+chrY	149	.	T	TGAAG	.	PASS	DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:13:5:8:61.54%:4,1,2,6
+chrM	160	.	G	GTTT	.	PASS	DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:36:2:19:90.48%:1,1,15,4
+chrM	170	.	T	TGAAG	.	PASS	DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:13:5:8:61.54%:4,1,2,6
diff --git a/vcf/test/issue-140-file3.vcf b/vcf/test/issue-140-file3.vcf
new file mode 100644
index 0000000..754f6b6
--- /dev/null
+++ b/vcf/test/issue-140-file3.vcf
@@ -0,0 +1,25 @@
+##fileformat=VCFv4.1
+##source=VarScan2
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
+##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
+##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
+##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
+##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
+##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL
+chr3	50	.	TC	T	.	PASS	DP=41;SS=1;SSC=2;GPV=9.8874E-23;SPV=5.3659E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:22:1:21:95.45%:1,0,15,6
+chr10	60	.	T	TTAA	.	PASS	DP=27;SS=1;SSC=2;GPV=1.4382E-14;SPV=5.5556E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:12:0:12:100%:0,0,0,12
+chr10	70	.	C	CTG	.	PASS	DP=40;SS=1;SSC=7;GPV=3.6006E-9;SPV=1.9922E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:10:6:4:40%:0,6,0,4
+chr11	80	.	AGTT	A	.	PASS	DP=86;SS=1;SSC=0;GPV=4.1554E-34;SPV=8.5795E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:32:4:28:87.5%:1,3,0,28
+chr11	90	.	GA	G	.	PASS	DP=41;SS=1;SSC=3;GPV=1.9197E-12;SPV=4.089E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/1:.:15:5:9:64.29%:1,4,0,9
+chr20	100	.	TTTTG	T	.	PASS	DP=23;SS=1;SSC=1;GPV=2.9149E-12;SPV=6.5217E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:8:0:8:100%:0,0,7,1
+chrX	120	.	G	GA	.	PASS	DP=61;SS=1;SSC=1;GPV=1.6967E-25;SPV=7.0485E-1	GT:GQ:DP:RD:AD:FREQ:DP4	1/1:.:22:3:19:86.36%:0,3,1,18

From 28dfe376695c8a2496f7cdc5e39187a85f5d5ed8 Mon Sep 17 00:00:00 2001
From: datagram <weisbbe1@skywalker-1-15.local>
Date: Fri, 7 Feb 2014 03:24:00 -0800
Subject: [PATCH 087/168] Added tests for walk_together with more complex
 inputs

---
 vcf/test/test_vcf.py | 65 ++++++++++----------------------------------
 1 file changed, 14 insertions(+), 51 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 9853ced..efa633e 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -961,13 +961,11 @@ def test_walk(self):
         self.assertEqual(n, 5)
 
         # artificial case 2 from the left, 2 from the right, 2 together, 1 from the right, 1 from the left
-
         expected = 'llrrttrl'
         reader1 = vcf.Reader(fh('walk_left.vcf'))
         reader2 = vcf.Reader(fh('example-4.0.vcf'))
 
         for ex, recs in zip(expected, utils.walk_together(reader1, reader2)):
-
             if ex == 'l':
                 assert recs[0] is not None
                 assert recs[1] is None
@@ -978,55 +976,20 @@ def test_walk(self):
                 assert recs[0] is not None
                 assert recs[1] is not None
 
-        # case with working custom equality function
-
-        # without custom function, most records in these files
-        # are different since the default equality checks
-        # for ALT values
-
-        reader1 = vcf.Reader(fh('example-4.0.vcf'))
-        reader2 = vcf.Reader(fh('walk_refcall.vcf'))
-
-        # counters for distinct records and overlapping records
-        ndist_def, nover_def = 0, 0
-        for x in utils.walk_together(reader1, reader2):
-            assert len(x) == 2
-            if x[0] is not None and x[1] is not None:
-                assert (x[0] == x[1] and x[1] == x[0])
-                nover_def += 1
-            ndist_def += 1
-        # check how many overlapping records
-        assert nover_def == 1
-        # check how many distinct records
-        assert ndist_def == 8
-
-        # with custom function that does not check ALT,
-        # we see more overlaps and less distinct records
-
-        def custom_eq(rec1, rec2):
-            # check for equality only on CHROM, POS, and REF
-            if rec1 is None or rec2 is None:
-                return False
-            return rec1.CHROM == rec2.CHROM and rec1.POS == rec2.POS and \
-                    rec1.REF == rec2.REF
-
-        reader1 = vcf.Reader(fh('example-4.0.vcf'))
-        reader2 = vcf.Reader(fh('walk_refcall.vcf'))
-
-        ndist_cust, nover_cust = 0, 0
-        for x in utils.walk_together(reader1, reader2, eq_func=custom_eq):
-            self.assertEqual(len(x), 2)
-            # avoid assert() when one record is None
-            if x[0] is not None and x[1] is not None:
-                assert (custom_eq(x[0], x[1]) and custom_eq(x[1], x[0]))
-                ncomps += 1
-            # still increment counter to ensure iteration is finished for all
-            # records
-            nrecs += 1
-        # check number of records total
-        self.assertEqual(nrecs, 5)
-        # check how many records found in all files
-        self.assertEqual(ncomps, 4)
+        # test files with many chromosomes, set 'vcf_record_sort_key' to define chromosome order
+        chr_order = map(str, range(1, 30)) + ['X', 'Y', 'M']
+        get_key = lambda r: (chr_order.index(r.CHROM.replace('chr','')), r.POS)
+        reader1 = vcf.Reader(fh('issue-140-file1.vcf'))
+        reader2 = vcf.Reader(fh('issue-140-file2.vcf'))
+        reader3 = vcf.Reader(fh('issue-140-file3.vcf'))
+        expected = "66642577752767662466" # each char is an integer bit flag - like file permissions
+        for ex, recs in zip(expected, utils.walk_together(reader1, reader2, reader3, vcf_record_sort_key = get_key)):
+            ex = int(ex)
+            for i, flag in enumerate([0x4, 0x2, 0x1]):
+                if ex & flag:
+                     self.assertNotEqual(recs[i], None)
+                else:
+                     self.assertEqual(recs[i], None)
 
     def test_trim(self):
         tests = [('TAA GAA', 'T G'),

From 734daf4037727927600476decc79d6d929f3a3d4 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Mon, 10 Feb 2014 10:55:56 +0000
Subject: [PATCH 088/168] bump version

---
 docs/HISTORY.rst | 5 +++++
 vcf/__init__.py  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 2b03818..723e841 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,11 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.6 Release
+-------------
+
+* better walk together record ordering (Thanks @datagram, #141)
+
 0.6.5 Release
 -------------
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index ec89aee..acfb87a 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.5'
+VERSION = '0.6.6'

From d1a9fdc56a9e52e798a7b00315dd5de01e6d8e99 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Fri, 21 Feb 2014 10:17:45 +0000
Subject: [PATCH 089/168] fix missing .pyx

---
 MANIFEST.in      | 1 +
 docs/HISTORY.rst | 5 +++++
 vcf/__init__.py  | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 MANIFEST.in

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..44f678a
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include vcf *.pyx
diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 723e841..defff0d 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -17,6 +17,11 @@ New features should have test code sent with them.
 Changes
 =======
 
+0.6.7 Release
+-------------
+
+* Include missing .pyx files 
+
 0.6.6 Release
 -------------
 
diff --git a/vcf/__init__.py b/vcf/__init__.py
index acfb87a..875e2d4 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -177,4 +177,4 @@
 from vcf.filters import Base as Filter
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 
-VERSION = '0.6.6'
+VERSION = '0.6.7'

From cbe8d906a3fd7491bbd6ce63a4f980a8e258b58d Mon Sep 17 00:00:00 2001
From: Lenna Peterson <arklenna@gmail.com>
Date: Sat, 22 Feb 2014 18:40:27 -0500
Subject: [PATCH 090/168] Restore subprocess import to test

---
 vcf/test/test_vcf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 3bd788b..c7526ed 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -5,6 +5,7 @@
 import commands
 import cPickle
 from StringIO import StringIO
+import subprocess
 
 import vcf
 from vcf import utils

From 097f2d0097bc7d5fb2b48a2d42514175ae72041f Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Thu, 6 Mar 2014 16:59:10 -0500
Subject: [PATCH 091/168] making alternate allele frequency work in the case of
 non-diploid alleles

---
 vcf/model.py                    |  7 ++++---
 vcf/test/example-4.1-ploidy.vcf | 20 ++++++++++++++++++++
 vcf/test/test_vcf.py            |  7 ++++++-
 3 files changed, 30 insertions(+), 4 deletions(-)
 create mode 100644 vcf/test/example-4.1-ploidy.vcf

diff --git a/vcf/model.py b/vcf/model.py
index c6e8f42..11c29d7 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -221,12 +221,13 @@ def aaf(self):
         """ A list of allele frequencies of alternate alleles.
            NOTE: Denominator calc'ed from _called_ genotypes.
         """
-        num_chroms = 2.0 * self.num_called
+        num_chroms = 0.0
         allele_counts = Counter()
         for s in self.samples:
             if s.gt_type is not None:
-                allele_counts.update([s.gt_alleles[0]])
-                allele_counts.update([s.gt_alleles[1]])
+                for a in s.gt_alleles:
+                    allele_counts.update([a])
+                    num_chroms += 1
         return [allele_counts[str(i)]/num_chroms for i in range(1, len(self.ALT)+1)]
 
     @property
diff --git a/vcf/test/example-4.1-ploidy.vcf b/vcf/test/example-4.1-ploidy.vcf
new file mode 100644
index 0000000..464c017
--- /dev/null
+++ b/vcf/test/example-4.1-ploidy.vcf
@@ -0,0 +1,20 @@
+##fileformat=VCFv4.1
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=X,length=155270560,assembly=B36,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+X	60034	rs186434315	T	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1|0:48:8:51,51	1/1/1:43:5:.,.
\ No newline at end of file
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index c7526ed..ec9ecff 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -458,8 +458,13 @@ def test_aaf(self):
                 self.assertEqual([2.0/6.0, 4.0/6.0], aaf)
             if var.POS == 1230237:
                 self.assertEqual([0.0/6.0], aaf)
-            elif var.POS == 1234567:
+            if var.POS == 1234567:
                 self.assertEqual([2.0/4.0, 1.0/4.0], aaf)
+        reader = vcf.Reader(fh('example-4.1-ploidy.vcf'))
+        for var in reader:
+            aaf = var.aaf
+            if var.POS == 60034:
+                self.assertEqual([4.0/6.0], aaf)
 
     def test_pi(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))

From 9a51b243b40a415941badc4e128d629b21e53a95 Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Thu, 6 Mar 2014 17:02:48 -0500
Subject: [PATCH 092/168] fixing small typo in elif in test case for aaf

---
 vcf/test/test_vcf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ec9ecff..80a901d 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -458,7 +458,7 @@ def test_aaf(self):
                 self.assertEqual([2.0/6.0, 4.0/6.0], aaf)
             if var.POS == 1230237:
                 self.assertEqual([0.0/6.0], aaf)
-            if var.POS == 1234567:
+            elif var.POS == 1234567:
                 self.assertEqual([2.0/4.0, 1.0/4.0], aaf)
         reader = vcf.Reader(fh('example-4.1-ploidy.vcf'))
         for var in reader:

From 608078a5fabd9e5e3ee1680f2306141a952609a5 Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Thu, 6 Mar 2014 17:11:21 -0500
Subject: [PATCH 093/168] adding one more test case for non-diploids

---
 vcf/test/example-4.1-ploidy.vcf | 3 ++-
 vcf/test/test_vcf.py            | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/vcf/test/example-4.1-ploidy.vcf b/vcf/test/example-4.1-ploidy.vcf
index 464c017..4b9f048 100644
--- a/vcf/test/example-4.1-ploidy.vcf
+++ b/vcf/test/example-4.1-ploidy.vcf
@@ -17,4 +17,5 @@
 ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
 ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
-X	60034	rs186434315	T	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1|0:48:8:51,51	1/1/1:43:5:.,.
\ No newline at end of file
+X	60034	rs186434315	T	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1|0:48:8:51,51	1/1/1:43:5:.,.
+X	60378	rs185512268	C	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1|0:48:8:51,51	1/1/1:43:5:.,.
\ No newline at end of file
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 80a901d..947f554 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -465,6 +465,8 @@ def test_aaf(self):
             aaf = var.aaf
             if var.POS == 60034:
                 self.assertEqual([4.0/6.0], aaf)
+            elif var.POS == 60387:
+                self.assertEqual([1.0/3.0], aaf)
 
     def test_pi(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))

From 4952f63c4866912f25a0d8f775697d1df3bb0e43 Mon Sep 17 00:00:00 2001
From: mgymrek <mgymrek@mit.edu>
Date: Thu, 6 Mar 2014 17:42:17 -0500
Subject: [PATCH 094/168] updating ploidy vcf example file

---
 vcf/test/example-4.1-ploidy.vcf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/test/example-4.1-ploidy.vcf b/vcf/test/example-4.1-ploidy.vcf
index 4b9f048..6704048 100644
--- a/vcf/test/example-4.1-ploidy.vcf
+++ b/vcf/test/example-4.1-ploidy.vcf
@@ -18,4 +18,4 @@
 ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
 X	60034	rs186434315	T	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1|0:48:8:51,51	1/1/1:43:5:.,.
-X	60378	rs185512268	C	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1|0:48:8:51,51	1/1/1:43:5:.,.
\ No newline at end of file
+X	60378	rs185512268	C	A	100	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0:48:1:51,51	1:48:8:51,51	0:43:5:.,.
\ No newline at end of file

From eeb892cfbb294621a4b9ed1d7bc7caf6c3751b7f Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Mon, 12 May 2014 16:45:37 -0700
Subject: [PATCH 095/168] Marks skipped tests as skipped, not passed.

Decorates tests that are potentially skipped, as well as broken tests
that are always skipped, as being skipped, rather than indicating
falsely that these tests have passed (the result of premature return
statements prior to any assertions in the tests).

This introduces another dependency for Python 2.6, the unittest2 module,
which back-ported this functionality from Python 2.7 and Python 3.
---
 .travis.yml          |  2 +-
 tox.ini              |  1 +
 vcf/test/test_vcf.py | 22 ++++++++++++++--------
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f54f5da..00b087c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,7 @@ python:
   - "3.3"
   - "pypy"
 install:
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse counter ordereddict; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse counter ordereddict unittest2; fi"
   - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam; fi"
   - python setup.py install
 script: python setup.py test
diff --git a/tox.ini b/tox.ini
index 0c07a88..d8e584d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -18,6 +18,7 @@ deps =
     ordereddict
     cython
     pysam
+    unittest2
 
 [testenv:py27]
 deps =
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index c7526ed..2ff8920 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1,5 +1,9 @@
 from __future__ import print_function
 import unittest
+try:
+    unittest.skip
+except AttributeError:
+    import unittest2 as unittest
 import doctest
 import os
 import commands
@@ -7,6 +11,11 @@
 from StringIO import StringIO
 import subprocess
 
+try:
+    import pysam
+except ImportError:
+    pysam = None
+
 import vcf
 from vcf import utils
 
@@ -814,12 +823,9 @@ class TestTabix(unittest.TestCase):
     def setUp(self):
         self.reader = vcf.Reader(fh('tb.vcf.gz', 'rb'))
 
-        self.run = vcf.parser.pysam is not None
-
 
+    @unittest.skipUnless(pysam, "test requires installation of PySAM.")
     def testFetchRange(self):
-        if not self.run:
-            return
         lines = list(self.reader.fetch('20', 14370, 14370))
         self.assertEquals(len(lines), 1)
         self.assertEqual(lines[0].POS, 14370)
@@ -833,9 +839,9 @@ def testFetchRange(self):
         lines = list(self.reader.fetch('20', 1110695, 1234567))
         self.assertEquals(len(lines), 3)
 
+
+    @unittest.skipUnless(pysam, "test requires installation of PySAM.")
     def testFetchSite(self):
-        if not self.run:
-            return
         site = self.reader.fetch('20', 14370)
         self.assertEqual(site.POS, 14370)
 
@@ -920,9 +926,9 @@ def testSampleFilterModule(self):
 class TestFilter(unittest.TestCase):
 
 
+    @unittest.skip("test currently broken")
     def testApplyFilter(self):
         # FIXME: broken with distribute
-        return
         s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq')
         #print(out)
         self.assertEqual(s, 0)
@@ -950,9 +956,9 @@ def testApplyFilter(self):
         self.assertEqual(n, 2)
 
 
+    @unittest.skip("test currently broken")
     def testApplyMultipleFilters(self):
         # FIXME: broken with distribute
-        return
         s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 '
         '--genotype-quality 50 test/example-4.0.vcf sq mgq')
         self.assertEqual(s, 0)

From f3d6a35abbf1dfc866cbc4ad35eb999949a9102a Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Mon, 12 May 2014 17:01:55 -0700
Subject: [PATCH 096/168] Skips fragile tests broken for Python 3.

---
 vcf/test/test_vcf.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 2ff8920..2eef51b 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -10,6 +10,7 @@
 import cPickle
 from StringIO import StringIO
 import subprocess
+import sys
 
 try:
     import pysam
@@ -19,6 +20,8 @@
 import vcf
 from vcf import utils
 
+IS_PYTHON2 = sys.version_info[0] == 2
+
 suite = doctest.DocTestSuite(vcf)
 
 
@@ -878,6 +881,7 @@ def testOpenFilenameGzipped(self):
 
 
 class TestSampleFilter(unittest.TestCase):
+    @unittest.skipUnless(IS_PYTHON2, "test broken for Python 3")
     def testCLIListSamples(self):
         proc = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         out, err = proc.communicate()
@@ -886,6 +890,7 @@ def testCLIListSamples(self):
         expected_out = ['Samples:', '0: NA00001', '1: NA00002', '2: NA00003']
         self.assertEqual(out.splitlines(), expected_out)
 
+    @unittest.skipUnless(IS_PYTHON2, "test broken for Python 3")
     def testCLIWithFilter(self):
         proc = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf -f 1,2 --quiet', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         out, err = proc.communicate()

From 0e757e1ef3cccf33fda7d21b2390ecaba6eb59ac Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Mon, 12 May 2014 17:45:06 -0700
Subject: [PATCH 097/168] Skips broken test for PyPy.

---
 vcf/test/test_vcf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 2eef51b..ab73b8a 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -21,6 +21,7 @@
 from vcf import utils
 
 IS_PYTHON2 = sys.version_info[0] == 2
+IS_NOT_PYPY = 'PyPy' not in sys.version
 
 suite = doctest.DocTestSuite(vcf)
 
@@ -906,6 +907,7 @@ def testCLIWithFilter(self):
         rec = reader.next()
         self.assertEqual(len(rec.samples), 1)
 
+    @unittest.skipUnless(IS_NOT_PYPY, "test broken for PyPy")
     def testSampleFilterModule(self):
         # init filter with filename, get list of samples
         filt = vcf.SampleFilter('vcf/test/example-4.1.vcf')

From 49be99b677e1fa72697f182a438802114f3b4b89 Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Tue, 13 May 2014 14:30:50 -0700
Subject: [PATCH 098/168] Decorate the TestTabix case rather than its tests.

---
 vcf/test/test_vcf.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ab73b8a..85354bb 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -822,13 +822,13 @@ def test_gt_types(self):
                 self.assertEqual([None,1,2], gt_types)
 
 
+@unittest.skipUnless(pysam, "test requires installation of PySAM.")
 class TestTabix(unittest.TestCase):
 
     def setUp(self):
         self.reader = vcf.Reader(fh('tb.vcf.gz', 'rb'))
 
 
-    @unittest.skipUnless(pysam, "test requires installation of PySAM.")
     def testFetchRange(self):
         lines = list(self.reader.fetch('20', 14370, 14370))
         self.assertEquals(len(lines), 1)
@@ -844,7 +844,6 @@ def testFetchRange(self):
         self.assertEquals(len(lines), 3)
 
 
-    @unittest.skipUnless(pysam, "test requires installation of PySAM.")
     def testFetchSite(self):
         site = self.reader.fetch('20', 14370)
         self.assertEqual(site.POS, 14370)

From 4fba62c16e78cb158bd443a2a3857bc5a9b437ee Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Tue, 13 May 2014 23:49:12 -0700
Subject: [PATCH 099/168] Reader.fetch uses zero-based, half-open coordinates.

These changes make the behavior of Reader.fetch consistent with with
pysam.Tabixfile, which uses the zero-based, half-open coordinate system
for Tabixfile.fetch. See
http://www.cgat.org/~andreas/documentation/pysam/api.html#pysam.Tabixfile.fetch

Previously, PyVCF's Reader.fetch declared no particular coordinate
system. Since the method quietly deducted 1 from the start position,
apparently it assumed users were going to input a one-based coordinate
there. However, users familiar with pysam's Tabixfile for other formats
get an unexpected surprise when variants ahead of the start coordinate
start getting returned by Reader.fetch.

As _Record.start and _Record.end are in the ZBHO coordinate system, it
adds to the consistency that fetch take start and end coordinates in
ZBHO, so the same _Record instance could be retrieved using its .CHROM,
.start, and .end coordinates.

This change also removes the prior behavior of fetch of returning a
single _Record instance if given only chrom and start coordinates, by
implicitly doing a Tabixfile.fetch(chrom, start-1, start). The new
behavior when omitting the end parameter is to return an iterator of
_Record instances starting at start and continuing through the end of
the chromosome chrom. Again, this is the behavior consistent with
pysam.Tabixfile.fetch, and is what users ought to expect.

This change also allows the user to omit both the start and end
positions. In this case, an iterable of _Record instances for all
records for the particular chromosome chrom will be returned, which
again, is consistent with Tabixfile.fetch. This behavior also resolves
Issue #123 "Cannot fetch() whole chromosome".
---
 vcf/parser.py        | 46 ++++++++++++++++++++++++---------------
 vcf/test/test_vcf.py | 52 +++++++++++++++++++++++++++++---------------
 2 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index de39353..39d1f8a 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -165,7 +165,7 @@ def read_format(self, format_string):
                        match.group('type'), match.group('desc'))
 
         return (match.group('id'), form)
-    
+
     def read_contig(self, contig_string):
         '''Read a meta-contigrmation INFO line.'''
         match = self.contig_pattern.match(contig_string)
@@ -321,7 +321,7 @@ def _parse_metainfo(self):
             elif line.startswith('##FORMAT'):
                 key, val = parser.read_format(line)
                 self.formats[key] = val
-            
+
             elif line.startswith('##contig'):
                 key, val = parser.read_contig(line)
                 self.contigs[key] = val
@@ -569,14 +569,36 @@ def next(self):
 
         return record
 
-    def fetch(self, chrom, start, end=None):
-        """ fetch records from a Tabix indexed VCF, requires pysam
-            if start and end are specified, return iterator over positions
-            if end not specified, return individual ``_Call`` at start or None
+    def fetch(self, chrom, start=None, end=None):
+        """ Fetches records from a tabix-indexed VCF file and returns an
+            iterable of ``_Record`` instances
+
+            chrom must be specified.
+
+            The start and end coordinates are in the zero-based,
+            half-open coordinate system, similar to ``_Record.start`` and
+            ``_Record.end``. The very first base of a chromosome is
+            index 0, and the the region includes bases up to, but not
+            including the base at the end coordinate. For example
+            ``fetch('4', 10, 20)`` would include all variants
+            overlapping a 10 base pair region from the 11th base of
+            through the 20th base (which is at index 19) of chromosome
+            4. It would not include the 21st base (at index 20). See
+            http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms
+            for more information on the zero-based, half-open coordinate
+            system.
+
+            If end is omitted, all variants from start until the end of
+            the chromosome chrom will be included.
+
+            If start and end are omitted, all variants on chrom will be
+            returned.
+
+            requires pysam
+
         """
         if not pysam:
             raise Exception('pysam not available, try "pip install pysam"?')
-
         if not self.filename:
             raise Exception('Please provide a filename (or a "normal" fsock)')
 
@@ -586,16 +608,6 @@ def fetch(self, chrom, start, end=None):
         if self._prepend_chr and chrom[:3] == 'chr':
             chrom = chrom[3:]
 
-        # not sure why tabix needs position -1
-        start = start - 1
-
-        if end is None:
-            self.reader = self._tabix.fetch(chrom, start, start + 1)
-            try:
-                return self.next()
-            except StopIteration:
-                return None
-
         self.reader = self._tabix.fetch(chrom, start, end)
         return self
 
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 85354bb..e9d9756 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -823,35 +823,53 @@ def test_gt_types(self):
 
 
 @unittest.skipUnless(pysam, "test requires installation of PySAM.")
-class TestTabix(unittest.TestCase):
+class TestFetch(unittest.TestCase):
 
     def setUp(self):
         self.reader = vcf.Reader(fh('tb.vcf.gz', 'rb'))
 
 
-    def testFetchRange(self):
-        lines = list(self.reader.fetch('20', 14370, 14370))
-        self.assertEquals(len(lines), 1)
-        self.assertEqual(lines[0].POS, 14370)
+    def assertFetchedExpectedPositions(
+            self, fetched_variants, expected_positions):
+        fetched_positions = [var.POS for var in fetched_variants]
+        self.assertEqual(fetched_positions, expected_positions)
+
+
+    def testNoVariantsInRange(self):
+        fetched_variants = self.reader.fetch('20', 14370, 17329)
+        self.assertFetchedExpectedPositions(fetched_variants, [])
 
-        lines = list(self.reader.fetch('20', 14370, 17330))
-        self.assertEquals(len(lines), 2)
-        self.assertEqual(lines[0].POS, 14370)
-        self.assertEqual(lines[1].POS, 17330)
 
+    def testNoVariantsForZeroLengthInterval(self):
+        fetched_variants = self.reader.fetch('20', 14369, 14369)
+        self.assertFetchedExpectedPositions(fetched_variants, [])
+
+
+    def testFetchRange(self):
+        fetched_variants = self.reader.fetch('20', 14369, 14370)
+        self.assertFetchedExpectedPositions(fetched_variants, [14370])
 
-        lines = list(self.reader.fetch('20', 1110695, 1234567))
-        self.assertEquals(len(lines), 3)
+        fetched_variants = self.reader.fetch('20', 14369, 17330)
+        self.assertFetchedExpectedPositions(
+                fetched_variants, [14370, 17330])
 
+        fetched_variants = self.reader.fetch('20', 1110695, 1234567)
+        self.assertFetchedExpectedPositions(
+                fetched_variants, [1110696, 1230237, 1234567])
 
-    def testFetchSite(self):
-        site = self.reader.fetch('20', 14370)
-        self.assertEqual(site.POS, 14370)
 
-        site = self.reader.fetch('20', 14369)
-        assert site is None
+    def testFetchesFromStartIfStartOnlySpecified(self):
+        fetched_variants = self.reader.fetch('20', 1110695)
+        self.assertFetchedExpectedPositions(
+                fetched_variants, [1110696, 1230237, 1234567])
 
 
+    def testFetchesAllFromChromIfOnlyChromSpecified(self):
+        fetched_variants = self.reader.fetch('20')
+        self.assertFetchedExpectedPositions(
+                fetched_variants,
+                [14370, 17330, 1110696, 1230237, 1234567]
+        )
 
 
 class TestOpenMethods(unittest.TestCase):
@@ -1090,7 +1108,7 @@ def test_meta(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMixedFiltering))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFetch))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From 2e4498d882f96ba028a9e808c7ec043e28d0ce17 Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Wed, 14 May 2014 09:23:18 -0700
Subject: [PATCH 100/168] Fixes fetch documentation in package docstring.

This corrects several lines that relate to the changes to fetch brought
in by Pull Request #156.
---
 vcf/__init__.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/vcf/__init__.py b/vcf/__init__.py
index d13ae33..149d25a 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -143,20 +143,32 @@
     >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence
     True False True T
 
-Random access is supported for files with tabix indexes.  Simply call fetch for the
-region you are interested in::
+The Reader supports retrieval of records within designated regions for
+files with tabix indexes via the fetch method. Pass in a chromosome,
+and, optionally, start and end coordinates, for the regions of
+interest::
 
     >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
-    >>> for record in vcf_reader.fetch('20', 1110696, 1230237):  # doctest: +SKIP
+    >>> # fetch all records on chromosome 20 from base 1110696 through 1230237
+    >>> for record in vcf_reader.fetch('20', 1110695, 1230237):  # doctest: +SKIP
     ...     print record
     Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
     Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
 
-Or extract a single row::
+Note that the start and end coordinates are in the zero-based, half-open
+coordinate system, similar to ``_Record.start`` and ``_Record.end``. The
+very first base of a chromosome is index 0, and the the region includes
+bases up to, but not including the base at the end coordinate. For
+example::
 
-    >>> print vcf_reader.fetch('20', 1110696)  # doctest: +SKIP
-    Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
+    >>> # fetch all records on chromosome 4 from base 11 through 20
+    >>> vcf_reader.fetch('4', 10, 20)   # doctest: +SKIP
 
+would include all records overlapping a 10 base pair region from the
+11th base of through the 20th base (which is at index 19) of chromosome
+4. It would not include the 21st base (at index 20). (See
+http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms for more
+information on the zero-based, half-open coordinate system.)
 
 The ``Writer`` class provides a way of writing a VCF file.  Currently, you must specify a
 template ``Reader`` which provides the metadata::

From 2d522b5251bec4a3e6060ddb7d7aa6cde81e3887 Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Wed, 14 May 2014 09:30:10 -0700
Subject: [PATCH 101/168] Removes setup import from distutils that overrides
 setuptools setup.

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 321d69a..f063a53 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,4 @@
 from setuptools import setup
-from distutils.core import setup
 from distutils.extension import Extension
 
 try:

From 2451c160f95ee5744d89e2ce18f4ce6556410c17 Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Thu, 15 May 2014 14:02:49 -0700
Subject: [PATCH 102/168] Tidies up Python 2.6 dependencies

Moves all dependencies to the install_requires argument to setup.
---
 setup.py | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/setup.py b/setup.py
index f063a53..e2e56c9 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
 from setuptools import setup
 from distutils.extension import Extension
+import sys
 
 try:
     from Cython.Distutils import build_ext
@@ -7,23 +8,14 @@
 except:
     CYTHON = False
 
-requires = []
+IS_PYTHON26 = sys.version_info[:2] == (2, 6)
 
-# python 2.6 does not have argparse
-try:
-    import argparse
-except ImportError:
-    requires.append('argparse')
+DEPENDENCIES = ['setuptools']
+
+if IS_PYTHON26:
+    DEPENDENCIES.extend(['argparse', 'counter', 'ordereddict',
+                         'unittest2'])
 
-import collections
-try:
-    collections.Counter
-except AttributeError:
-    requires.append('counter')
-try:
-    collections.OrderedDict
-except AttributeError:
-    requires.append('ordereddict')
 
 # get the version without an import
 VERSION = "Undefined"
@@ -53,8 +45,7 @@
     description='Variant Call Format (VCF) parser for Python',
     long_description=DOC,
     test_suite='vcf.test.test_vcf.suite',
-    install_requires=['distribute'],
-    requires=requires,
+    install_requires=DEPENDENCIES,
     entry_points = {
         'vcf.filters': [
             'site_quality = vcf.filters:SiteQuality',

From 606659010bf65a3e35e3f9ee5aa14b248a56899e Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Thu, 15 May 2014 15:21:59 -0700
Subject: [PATCH 103/168] Updates PyPI trove classifiers.

---
 setup.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e2e56c9..fb4e512 100644
--- a/setup.py
+++ b/setup.py
@@ -62,10 +62,18 @@
         'Development Status :: 4 - Beta',
         'Intended Audience :: Developers',
         'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: BSD License',
+        'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
+        'Programming Language :: Cython',
         'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.6'
+        'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Topic :: Scientific/Engineering',
+        'Programming Language :: Python :: 3.2',
+        'Programming Language :: Python :: 3.3',
+        'Topic :: Scientific/Engineering :: Bio-Informatics',
       ],
     keywords='bioinformatics',
     use_2to3=True,

From 2ef6a4facd2b18a5419cc9512377d51f3d4e6360 Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Thu, 15 May 2014 18:02:52 -0700
Subject: [PATCH 104/168] Use requirements files to consolidate dependencies.

---
 .travis.yml                             |  3 +--
 requirements/common-requirements.txt    |  3 +++
 requirements/pypy-requirements.txt      |  1 +
 requirements/python2.6-requirements.txt |  5 +++++
 tox.ini                                 | 22 +++++-----------------
 5 files changed, 15 insertions(+), 19 deletions(-)
 create mode 100644 requirements/common-requirements.txt
 create mode 100644 requirements/pypy-requirements.txt
 create mode 100644 requirements/python2.6-requirements.txt

diff --git a/.travis.yml b/.travis.yml
index 00b087c..1e1b142 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,6 @@ python:
   - "3.3"
   - "pypy"
 install:
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse counter ordereddict unittest2; fi"
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install -r requirements/python2.6-requirements.txt; elif [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi"
   - python setup.py install
 script: python setup.py test
diff --git a/requirements/common-requirements.txt b/requirements/common-requirements.txt
new file mode 100644
index 0000000..ea364d9
--- /dev/null
+++ b/requirements/common-requirements.txt
@@ -0,0 +1,3 @@
+cython
+pysam
+setuptools
diff --git a/requirements/pypy-requirements.txt b/requirements/pypy-requirements.txt
new file mode 100644
index 0000000..49fe098
--- /dev/null
+++ b/requirements/pypy-requirements.txt
@@ -0,0 +1 @@
+setuptools
diff --git a/requirements/python2.6-requirements.txt b/requirements/python2.6-requirements.txt
new file mode 100644
index 0000000..27c9bc2
--- /dev/null
+++ b/requirements/python2.6-requirements.txt
@@ -0,0 +1,5 @@
+-r common-requirements.txt
+argparse
+counter
+ordereddict
+unittest2
diff --git a/tox.ini b/tox.ini
index d8e584d..953a9dc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,28 +7,16 @@
 envlist = py26, py27, py32, py33
 
 [testenv]
+deps =
+    -rrequirements/common-requirements.txt
 commands =
     rm -rf {toxinidir}/build
     python setup.py test
 
 [testenv:py26]
 deps =
-    argparse
-    counter
-    ordereddict
-    cython
-    pysam
-    unittest2
-
-[testenv:py27]
-deps =
-    pysam
-    cython
-
-[testenv:py32]
-deps =
-    cython
+    -rrequirements/python2.6-requirements.txt
 
-[testenv:py33]
+[testenv:pypy]
 deps =
-    cython
+    -rrequirements/pypy-requirements.txt

From 47acb567ddd51e29fb64feddf641593191f2222b Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Sun, 18 May 2014 22:51:51 -0700
Subject: [PATCH 105/168] Adds _Record.affected_start and .affected_end.

These coordinates should represent the zero-based, half-open region of
the reference sequence affected by all the events included in ALT. These
coordinates allow the user to identify precisely which bases are altered
by the events in the record.

Provides more thorough documentation on the coordinate schemes for
_Record.POS, .start, and .end.
---
 vcf/model.py         |  94 ++++++++++++++++-
 vcf/test/test_vcf.py | 235 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 326 insertions(+), 3 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index c6e8f42..f0c8a97 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -125,10 +125,45 @@ class _Record(object):
         INFO and FORMAT are available as properties.
 
         The list of genotype calls is in the ``samples`` property.
+
+        Regarding the coordinates associated with each instance:
+
+        - ``POS``, per VCF specification, is the one-based index
+          (the first base of the contig has an index of 1) of the first
+          base of the ``REF`` sequence.
+        - The ``start`` and ``end`` denote the coordinates of the entire
+          ``REF`` sequence in the zero-based, half-open coordinate
+          system (see
+          http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms),
+          where the first base of the contig has an index of 0, and the
+          interval runs up to, but does not include, the base at the
+          ``end`` index. This indexing scheme is analagous to Python
+          slice notation.
+        - The ``affected_start`` and ``affected_end`` coordinates are
+          also in the zero-based, half-open coordinate system. These
+          coordinates indicate the precise region of the reference
+          genome actually affected by the events denoted in ``ALT``
+          (i.e., the minimum ``affected_start`` and maximum
+          ``affected_end``).
+
+          - For SNPs and structural variants, the affected region
+            includes all bases of ``REF``, including the first base
+            (i.e., ``affected_start = start = POS - 1``).
+          - For deletions, the region includes all bases of ``REF``
+            except the first base, which flanks upstream the actual
+            deletion event, per VCF specification.
+          - For insertions, the ``affected_start`` and ``affected_end``
+            coordinates represent a 0 bp-length region between the two
+            flanking bases (i.e., ``affected_start`` =
+            ``affected_end``). This is analagous to Python slice
+            notation (see http://stackoverflow.com/a/2947881/38140).
+            Neither the upstream nor downstream flanking bases are
+            included in the region.
     """
     def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
             sample_indexes, samples=None):
         self.CHROM = CHROM
+        #: the one-based coordinate of the first nucleotide in ``REF``
         self.POS = POS
         self.ID = ID
         self.REF = REF
@@ -137,9 +172,9 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
         self.FILTER = FILTER
         self.INFO = INFO
         self.FORMAT = FORMAT
-        #: 0-based start coordinate
+        #: zero-based, half-open start coordinate of ``REF``
         self.start = self.POS - 1
-        #: 1-based end coordinate
+        #: zero-based, half-open end coordinate of ``REF``
         self.end = self.start + len(self.REF)
         #: list of alleles. [0] = REF, [1:] = ALTS
         self.alleles = [self.REF]
@@ -148,6 +183,61 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
         self.samples = samples or []
         self._sample_indexes = sample_indexes
 
+        # Setting affected_start and affected_end here for Sphinx
+        # autodoc purposes...
+        #: zero-based, half-open start coordinate of affected region of reference genome
+        self.affected_start = None
+        #: zero-based, half-open end coordinate of affected region of reference genome (not included in the region)
+        self.affected_end = None
+        self._set_start_and_end()
+
+
+    def _set_start_and_end(self):
+        self.affected_start = self.affected_end = self.POS
+        for alt in self.ALT:
+            if alt is None:
+                start, end = self._compute_coordinates_for_none_alt()
+            elif alt.type == 'SNV':
+                start, end = self._compute_coordinates_for_snp()
+            elif alt.type == 'MNV':
+                start, end = self._compute_coordinates_for_indel()
+            else:
+                start, end = self._compute_coordinates_for_sv()
+            self.affected_start = min(self.affected_start, start)
+            self.affected_end = max(self.affected_end, end)
+
+
+    def _compute_coordinates_for_none_alt(self):
+        start = self.POS - 1
+        end = start + len(self.REF)
+        return (start, end)
+
+
+    def _compute_coordinates_for_snp(self):
+        if len(self.REF) > 1:
+            start = self.POS
+            end = start + (len(self.REF) - 1)
+        else:
+            start = self.POS - 1
+            end = self.POS
+        return (start, end)
+
+
+    def _compute_coordinates_for_indel(self):
+        if len(self.REF) > 1:
+            start = self.POS
+            end = start + (len(self.REF) - 1)
+        else:
+            start = end = self.POS
+        return (start, end)
+
+
+    def _compute_coordinates_for_sv(self):
+        start = self.POS - 1
+        end = start + len(self.REF)
+        return (start, end)
+
+
     # For Python 2
     def __cmp__(self, other):
         return cmp((self.CHROM, self.POS), (getattr(other, "CHROM", None), getattr(other, "POS", None)))
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index e9d9756..66a5834 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -18,7 +18,7 @@
     pysam = None
 
 import vcf
-from vcf import utils
+from vcf import model, utils
 
 IS_PYTHON2 = sys.version_info[0] == 2
 IS_NOT_PYPY = 'PyPy' not in sys.version
@@ -765,6 +765,239 @@ def test_pickle(self):
             self.assertEqual(cPickle.loads(cPickle.dumps(var)), var)
 
 
+    def assert_has_expected_coordinates(
+            self,
+            record,
+            expected_coordinates,
+            expected_affected_coordinates
+        ):
+        self.assertEqual(
+                (record.start, record.end),
+                expected_coordinates
+        )
+        self.assertEqual(
+                (record.affected_start, record.affected_end),
+                expected_affected_coordinates
+        )
+
+
+    def test_coordinates_for_snp(self):
+        record = model._Record(
+                '1',
+                10,
+                'id1',
+                'C',
+                [model._Substitution('A')],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
+
+
+    def test_coordinates_for_insertion(self):
+        record = model._Record(
+                '1',
+                10,
+                'id2',
+                'C',
+                [model._Substitution('CTA')],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 10), (10, 10))
+
+
+    def test_coordinates_for_deletion(self):
+        record = model._Record(
+                '1',
+                10,
+                'id3',
+                'CTA',
+                [model._Substitution('C')],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
+
+
+    def test_coordinates_for_None_alt(self):
+        record = model._Record(
+                '1',
+                10,
+                'id4',
+                'C',
+                [None],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
+
+
+    def test_coordinates_for_multiple_snps(self):
+        record = model._Record(
+                '1',
+                10,
+                'id5',
+                'C',
+                [
+                    model._Substitution('A'),
+                    model._Substitution('G'),
+                    model._Substitution('T')
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
+
+
+    def test_coordinates_for_insert_and_snp(self):
+        record = model._Record(
+                '1',
+                10,
+                'id6',
+                'C',
+                [
+                    model._Substitution('GTA'),
+                    model._Substitution('G'),
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
+        record = model._Record(
+                '1',
+                10,
+                'id7',
+                'C',
+                [
+                    model._Substitution('G'),
+                    model._Substitution('GTA'),
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
+
+
+    def test_coordinates_for_snp_and_deletion(self):
+        record = model._Record(
+                '1',
+                10,
+                'id8',
+                'CTA',
+                [
+                    model._Substitution('C'),
+                    model._Substitution('CTG'),
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
+        record = model._Record(
+                '1',
+                10,
+                'id9',
+                'CTA',
+                [
+                    model._Substitution('CTG'),
+                    model._Substitution('C'),
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
+
+
+    def test_coordinates_for_insertion_and_deletion(self):
+        record = model._Record(
+                '1',
+                10,
+                'id10',
+                'CT',
+                [
+                    model._Substitution('CA'),
+                    model._Substitution('CTT'),
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
+        record = model._Record(
+                '1',
+                10,
+                'id11',
+                'CT',
+                [
+                    model._Substitution('CTT'),
+                    model._Substitution('CA'),
+                ],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
+
+
+    def test_coordinates_for_breakend(self):
+        record = model._Record(
+                '1',
+                10,
+                'id12',
+                'CTA',
+                [model._Breakend('1', 500, False, True, 'GGTC', True)],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assert_has_expected_coordinates(record, (9, 12), (9, 12))
+
+
 class TestCall(unittest.TestCase):
 
     def test_dunder_eq(self):

From 2f0d57706e67c7d2b13d1e527cae2201018ffc14 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 25 Jun 2014 22:41:27 +0200
Subject: [PATCH 106/168] Allow flag INFO field to be declared as string

As reported in #164, we previously crashed on flag INFO fields declared
as strings (and the number of values declared as 1). This is indeed not
according to spec, but we should probably allow it anyway.
---
 vcf/parser.py               |  1 +
 vcf/test/string_as_flag.vcf |  8 ++++++++
 vcf/test/test_vcf.py        | 11 +++++++++++
 3 files changed, 20 insertions(+)
 create mode 100644 vcf/test/string_as_flag.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index 39d1f8a..244e8f5 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -390,6 +390,7 @@ def _parse_info(self, info_str):
                     vals = entry[1].split(',') # commas are reserved characters indicating multiple values
                     val = self._map(str, vals)
                 except IndexError:
+                    entry_type = 'Flag'
                     val = True
 
             try:
diff --git a/vcf/test/string_as_flag.vcf b/vcf/test/string_as_flag.vcf
new file mode 100644
index 0000000..afa3b0d
--- /dev/null
+++ b/vcf/test/string_as_flag.vcf
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.1
+##INFO=<ID=AB,Number=1,Type=String,Description="Alt Base">
+##INFO=<ID=CD,Number=.,Type=String,Description="Alt Base">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	test
+chr2	21	.	A	G	.	.		GT	.
+chr2	24	.	G	T	.	.	AB	GT	.
+chr2	48	.	C	T	.	.	CD	GT	.
+chr2	75	.	T	C	.	.	AB;CD	GT	.
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index b096e47..2017653 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -262,6 +262,16 @@ def test_contig_line(self):
         self.assertEqual(reader.contigs['1'].length, 249250621)
 
 
+class TestStringAsFlag(unittest.TestCase):
+
+    def test_string_as_flag(self):
+        """A flag INFO field is declared as string (not allowed by the spec,
+        but seen in practice)."""
+        reader = vcf.Reader(fh('string_as_flag.vcf', 'r'))
+        for _ in reader:
+            pass
+
+
 class TestInfoOrder(unittest.TestCase):
 
     def _assert_order(self, definitions, fields):
@@ -1339,6 +1349,7 @@ def test_meta(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGoNL))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStringAsFlag))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoOrder))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoTypeCharacter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))

From d927381018650767f63242ac86aa31294a7c8939 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 25 Jun 2014 22:45:31 +0200
Subject: [PATCH 107/168] Don't crash when FORMAT is set to the missing value
 (.)

It is not valid according to the spec, but issue #164 shows a VCF file
where the FORMAT column contains just a dot character. We have no way
of interpreting the subsequent genotype columns in that case, so this
patch ignores them.
---
 vcf/parser.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vcf/parser.py b/vcf/parser.py
index 39d1f8a..99baf4e 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -559,6 +559,9 @@ def next(self):
             fmt = row[8]
         except IndexError:
             fmt = None
+        else:
+            if fmt == '.':
+                fmt = None
 
         record = _Record(chrom, pos, ID, ref, alt, qual, filt,
                 info, fmt, self._sample_indexes)

From e7d350b23bd1cff87cce768af44f296d3f868108 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sun, 6 Jul 2014 18:32:15 +0200
Subject: [PATCH 108/168] Don't crash on metadata lines without value

The spec actually does not allow for metadata lines without value, but we
shouldn't crash on them.

Fixes #168
---
 vcf/parser.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index d957e31..9e4f739 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -220,9 +220,13 @@ def read_meta_hash(self, meta_string):
     def read_meta(self, meta_string):
         if re.match("##.+=<", meta_string):
             return self.read_meta_hash(meta_string)
-        else:
-            match = self.meta_pattern.match(meta_string)
-            return match.group('key'), match.group('val')
+        match = self.meta_pattern.match(meta_string)
+        if not match:
+            # Spec only allows key=value, but we try to be liberal and
+            # interpret anything else as key=none (and all values are parsed
+            # as strings).
+            return meta_string.lstrip('#'), 'none'
+        return match.group('key'), match.group('val')
 
 
 class Reader(object):

From c8f3f8d5fe9bdee2049e45bc09d01037e30d1aee Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 9 Sep 2014 17:23:01 +0200
Subject: [PATCH 109/168] Temporarily fix pysam on 0.7.8 (0.8.0 fails on Python
 3)

Before we figure out what causes this, let's have a working test suite by
fixing pysam on the latest working release.

Traceback:

    Traceback (most recent call last):
      File "/home/travis/build/jamescasbon/PyVCF/build/lib.linux-x86_64-3.3/vcf/test/test_vcf.py", line 1109, in testNoVariantsInRange
        fetched_variants = self.reader.fetch('20', 14370, 17329)
      File "/home/travis/build/jamescasbon/PyVCF/build/lib.linux-x86_64-3.3/vcf/parser.py", line 623, in fetch
        self.reader = self._tabix.fetch(chrom, start, end)
      File "ctabix.pyx", line 345, in pysam.ctabix.Tabixfile.fetch (pysam/ctabix.c:4241)
    TypeError: expected bytes, str found

See #175
---
 requirements/common-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/common-requirements.txt b/requirements/common-requirements.txt
index ea364d9..7bbf965 100644
--- a/requirements/common-requirements.txt
+++ b/requirements/common-requirements.txt
@@ -1,3 +1,3 @@
 cython
-pysam
+pysam==0.7.8
 setuptools

From eafd842064d5f9ddbfc5fa1c7fa4bd3761feafdf Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 9 Sep 2014 14:02:36 +0200
Subject: [PATCH 110/168] Partial support for VCFv4.2

- Add R as an INFO field count (number of alleles including reference).
- Support the optional Source and Version keys on INFO metainformation.

Thanks alot @travc for contributing these fixes!

See #172
---
 vcf/parser.py            | 14 ++++++----
 vcf/test/example-4.2.vcf | 56 ++++++++++++++++++++++++++++++++++++++++
 vcf/test/test_vcf.py     | 20 ++++++++++++++
 3 files changed, 85 insertions(+), 5 deletions(-)
 create mode 100644 vcf/test/example-4.2.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index 9e4f739..6d668af 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -63,12 +63,13 @@
 # Conversion between value in file and Python value
 field_counts = {
     '.': None,  # Unknown number of values
-    'A': -1,  # Equal to the number of alleles in a given record
+    'A': -1,  # Equal to the number of alternate alleles in a given record
     'G': -2,  # Equal to the number of genotypes in a given record
+    'R': -3,  # Equal to the number of alleles including reference in a given record
 }
 
 
-_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc'])
+_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc', 'source', 'version'])
 _Filter = collections.namedtuple('Filter', ['id', 'desc'])
 _Alt = collections.namedtuple('Alt', ['id', 'desc'])
 _Format = collections.namedtuple('Format', ['id', 'num', 'type', 'desc'])
@@ -82,9 +83,11 @@ def __init__(self):
         super(_vcf_metadata_parser, self).__init__()
         self.info_pattern = re.compile(r'''\#\#INFO=<
             ID=(?P<id>[^,]+),
-            Number=(?P<number>-?\d+|\.|[AG]),
+            Number=(?P<number>-?\d+|\.|[AGR]),
             Type=(?P<type>Integer|Float|Flag|Character|String),
             Description="(?P<desc>[^"]*)"
+            (?:,Source="(?P<source>[^"]*)")?
+            (?:,Version="?(?P<version>[^"]*)"?)?
             >''', re.VERBOSE)
         self.filter_pattern = re.compile(r'''\#\#FILTER=<
             ID=(?P<id>[^,]+),
@@ -96,7 +99,7 @@ def __init__(self):
             >''', re.VERBOSE)
         self.format_pattern = re.compile(r'''\#\#FORMAT=<
             ID=(?P<id>.+),
-            Number=(?P<number>-?\d+|\.|[AG]),
+            Number=(?P<number>-?\d+|\.|[AGR]),
             Type=(?P<type>.+),
             Description="(?P<desc>.*)"
             >''', re.VERBOSE)
@@ -126,7 +129,8 @@ def read_info(self, info_string):
         num = self.vcf_field_count(match.group('number'))
 
         info = _Info(match.group('id'), num,
-                     match.group('type'), match.group('desc'))
+                     match.group('type'), match.group('desc'),
+                     match.group('source'), match.group('version'))
 
         return (match.group('id'), info)
 
diff --git a/vcf/test/example-4.2.vcf b/vcf/test/example-4.2.vcf
new file mode 100644
index 0000000..d649fc3
--- /dev/null
+++ b/vcf/test/example-4.2.vcf
@@ -0,0 +1,56 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##samtoolsVersion=1.0-17-gfaf4dd6+htslib-1.0-11-g830ea73
+##samtoolsCommand=samtools mpileup -u -t DP,DPR,DV,DP4,INFO/DPR,SP -f /data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa -r KB704451:0004153102-0004172483 huge_list_of_bam_files_removed
+##reference=file:///data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa
+##contig=<ID=KB704451,length=13998812>
+##ALT=<ID=X,Description="Represents allele(s) other than observed.">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">
+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version=3>
+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">
+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">
+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">
+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">
+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">
+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">
+##FORMAT=<ID=DPR,Number=R,Type=Integer,Description="Number of high-quality bases observed for each allele">
+##INFO=<ID=DPR,Number=R,Type=Integer,Description="Number of high-quality bases observed for each allele">
+##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases">
+##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled Genotype Quality">
+##FORMAT=<ID=GP,Number=G,Type=Float,Description="Phred-scaled genotype posterior probabilities">
+##INFO=<ID=ICB,Number=1,Type=Float,Description="Inbreeding Coefficient Binomial test (bigger is better)">
+##INFO=<ID=HOB,Number=1,Type=Float,Description="Bias in the number of HOMs number (smaller is better)">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality">
+##bcftools_callVersion=1.0-55-gc661821+htslib-1.0-11-g830ea73
+##bcftools_callCommand=call -m -vM -f GQ,GP
+##SnpSiftVersion="SnpSift 3.6c (build 2014-05-20), by Pablo Cingolani"
+##SnpSiftCmd="SnpSift varType - "
+##INFO=<ID=VARTYPE,Number=A,Type=Flag,Description="Variant types {SNP,MNP,INS,DEL,Mixed}">
+##INFO=<ID=SNP,Number=0,Type=Flag,Description="Variant is a SNP">
+##INFO=<ID=MNP,Number=0,Type=Flag,Description="Variant is an MNP">
+##INFO=<ID=INS,Number=0,Type=Flag,Description="Variant is an insertion">
+##INFO=<ID=DEL,Number=0,Type=Flag,Description="Variant is an deletion">
+##INFO=<ID=MIXED,Number=0,Type=Flag,Description="Variant is mixture of INS/DEL/SNP/MNP">
+##INFO=<ID=HOM,Number=0,Type=Flag,Description="Variant is homozygous">
+##INFO=<ID=HET,Number=0,Type=Flag,Description="Variant is heterozygous">
+##INFO=<ID=VARTYPE,Number=A,Type=String,Description="Comma separated list of variant types. One per allele">
+#CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  LUPI059 MINE001 OKJ042  LUPI001 LUPI007 LUPI024 LUPI056 LUPI071 LUPI074 LUPI082 MINE040 MINE100 MINE101 MINE105 MINE111 OKJ017  OKJ045  OKJ070  SAGA066 SAGA107 SAGA131 SAGA133 SAGA134 SAGA141 2012L_LUPI_002  2012L_LUPI_015  2012L_LUPI_017  2012L_LUPI_018  2012L_LUPI_035  2012L_LUPI_062  2012L_LUPI_065  2012L_LUPI_077  2012L_LUPI_083  2012L_LUPI_116  2012L_LUPI_013  2012L_LUPI_041  2012L_LUPI_068  2012L_LUPI_096  2012L_LUPI_098  2012L_LUPI_101  2012L_LUPI_103  2012_LUPI_156   2012_LUPI_157   2012_LUPI_161   2012_LUPI_171   2012_LUPI_173   2012_LUPI_180   2012L_LUPI_010  2012L_LUPI_012  2012L_LUPI_021  2012L_LUPI_045  2012L_LUPI_047  2012L_LUPI_060  2012L_LUPI_061  2012L_LUPI_067  2012_LUPI_125   2012_LUPI_129   2012_LUPI_146   2012_LUPI_178   2012_LUPI_211   2012_LUPI_277   2012_LUPI_278   2012_LUPI_279   2012_LUPI_284
+KB704451    4157846 .   N   A,C 167.0   .   DP=10;VDB=1.17174e-06;SGB=1.26353;MQ0F=0;DPR=0,6,4;AC=10,4;AN=14;DP4=0,0,10,0;MQ=60;SNP;VARTYPE=SNP,SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    1/2:74,23,14,57,0,54:4:4:0:0,0,4,0:0,3,1:144,56,16,90,0,57:16   1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0
+KB704451    4157870 .   T   C   275.0   .   DP=243;VDB=0.00023935;SGB=29.4468;RPB=0.0368658;MQB=0.979612;MQSB=0.268441;BQB=0.99223;MQ0F=0;DPR=213,19;ICB=0.85092;HOB=0.0287274;AC=6;AN=118;DP4=201,12,19,1;MQ=53;SNP;VARTYPE=SNP    GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,66,255:22:0:0:20,2,0,0:22,0:0,75,279:75   0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,21,144:21  1/1:193,36,0:12:12:0:0,0,12,0:0,12:168,20,0:20  0/0:0,9,95:3:0:0:3,0,0,0:3,0:0,18,119:18    0/1:78,0,110:7:3:0:3,1,3,0:4,3:68,0,125:68  0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,12,64:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,90:3:0:0:3,0,0,0:3,0:0,18,114:18    0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21  0/0:0,9,97:3:0:0:3,0,0,0:3,0:0,18,121:18    0/0:0,15,122:5:0:0:5,0,0,0:5,0:0,24,146:24  0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,15,95:15 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,15,82:15 0/0:0,18,155:6:0:0:6,0,0,0:6,0:0,27,179:27  0/0:0,3,39:1:0:0:1,0,0,0:1,0:0,12,63:12 0/1:35,3,0:1:1:0:0,0,1,0:0,1:23,0,12:12 0/0:0,9,87:3:0:0:3,0,0,0:3,0:0,18,111:18    0/1:47,0,104:6:2:0:4,0,2,0:4,2:37,0,119:37  0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,30,184:30  0/0:0,6,35:2:0:0:2,0,0,0:2,0:0,15,59:15 0/0:0,12,98:4:0:0:4,0,0,0:4,0:0,21,122:21   0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,15,94:15 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21  0/0:0,3,29:1:0:0:0,1,0,0:1,0:0,12,53:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,76:3:0:0:3,0,0,0:3,0:0,18,100:18    0/0:0,15,136:5:0:0:5,0,0,0:5,0:0,24,160:24  0/0:0,30,182:10:0:0:10,0,0,0:10,0:0,39,206:39   0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,15,93:15 0/0:0,27,152:9:0:0:9,0,0,0:9,0:0,36,176:36  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,15,118:15    0/0:0,21,195:7:0:0:5,2,0,0:7,0:0,30,219:30  0/0:0,9,92:3:0:0:2,1,0,0:3,0:0,18,116:18    0/1:33,0,18:2:1:0:0,1,1,0:1,1:23,0,33:23    0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,12,59:12 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,18,115:18    0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,30,212:10:0:0:9,1,0,0:10,0:0,39,236:39    0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18    ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,195:7:0:0:7,0,0,0:7,0:0,30,219:30  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,97:3:0:0:2,1,0,0:3,0:0,18,121:18    0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,18,117:18    0/0:0,9,116:3:0:0:3,0,0,0:3,0:0,18,140:18   0/0:0,6,71:2:0:0:1,1,0,0:2,0:0,15,95:15 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18    0/0:0,33,175:11:0:0:11,0,0,0:11,0:0,42,199:42   0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,15,87:15 0/0:0,21,145:7:0:0:7,0,0,0:7,0:0,30,169:30  0/0:0,3,39:1:0:0:0,1,0,0:1,0:0,12,63:12 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,18,108:18    0/0:0,3,13:1:0:0:1,0,0,0:1,0:0,12,37:12 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,12,47:12 0/0:0,12,106:4:0:0:4,0,0,0:4,0:0,21,130:21  0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,18,118:18    0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,15,91:15 0/0:2,5,27:2:1:0:1,0,0,1:1,0:0,12,49:12
+KB704451    4157877 .   G   A   999.0   .   DP=250;VDB=6.58963e-09;SGB=31.659;RPB=0.0227135;MQB=0.410318;MQSB=0.139343;BQB=0.0767891;MQ0F=0;DPR=188,48;ICB=0.990841;HOB=0.00761276;AC=17;AN=118;DP4=176,12,45,3;MQ=55;SNP;VARTYPE=SNP   GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/1:159,0,202:22:9:0:12,1,8,1:13,9:154,0,212:127    0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,16,134:16  0/0:0,51,207:17:0:0:17,0,0,0:17,0:0,55,221:55   0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13    0/1:123,0,61:8:5:0:3,0,4,1:3,5:118,0,71:71  0/0:0,3,38:1:0:0:0,1,0,0:1,0:0,7,52:7   0/1:68,0,29:3:2:0:1,0,1,1:1,2:63,0,39:39    0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,10,83:10 0/0:0,12,119:4:0:0:4,0,0,0:4,0:0,16,133:16  0/1:34,0,34:2:1:0:1,0,1,0:1,1:29,0,44:29    0/1:24,0,99:5:1:0:4,0,1,0:4,1:19,0,109:19   0/1:34,0,28:2:1:0:1,0,1,0:1,1:29,0,38:29    0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,10,72:10 0/1:122,0,57:7:4:0:3,0,4,0:3,4:117,0,67:67  0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,7,55:7   0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,7,43:7   0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16  0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,22,158:22  0/1:118,0,63:8:5:0:3,0,5,0:3,5:113,0,73:73  0/0:0,6,34:2:0:0:2,0,0,0:2,0:0,10,48:10 0/0:0,15,131:5:0:0:5,0,0,0:5,0:0,19,145:19  0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,10,86:10 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,10,103:10    1/1:124,12,0:4:4:0:0,0,4,0:0,4:112,4,2:4    0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,7,48:7   0/0:0,6,73:2:0:0:2,0,0,0:2,0:0,10,87:10 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,13,105:13    0/0:0,15,138:5:0:0:5,0,0,0:5,0:0,19,152:19  0/0:0,30,179:10:0:0:10,0,0,0:10,0:0,34,193:34   0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,10,79:10 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,10,84:10 0/0:0,27,155:9:0:0:9,0,0,0:9,0:0,31,169:31  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,10,108:10    0/0:0,15,161:5:0:0:3,2,0,0:5,0:0,19,175:19  0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,10,79:10 0/1:36,3,0:1:1:0:0,0,1,0:0,1:29,0,7:7   0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,13,107:13    0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,7,48:7   0/1:87,0,137:10:4:0:5,1,4,0:6,4:82,0,147:82 0/1:57,0,26:3:2:0:1,0,2,0:1,2:52,0,36:35    ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:139,0,73:7:4:0:3,0,4,0:3,4:134,0,83:83  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,75:2:0:0:1,1,0,0:2,0:0,10,89:10 0/1:90,9,0:3:3:0:0,0,3,0:0,3:79,2,3:3   0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13    0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,9,88:3:0:0:3,0,0,0:3,0:0,13,102:13    0/0:0,33,173:11:0:0:11,0,0,0:11,0:0,37,187:37   0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,10,71:10 0/0:0,15,125:5:0:0:5,0,0,0:5,0:0,19,139:19  0/0:0,6,61:2:0:0:1,1,0,0:2,0:0,10,75:10 0/1:24,0,51:3:1:0:2,0,1,0:2,1:19,0,61:19    0/0:0,3,30:1:0:0:1,0,0,0:1,0:0,7,44:7   0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,7,37:7   0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16  0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,7,49:7   0/1:25,0,61:3:1:0:2,0,1,0:2,1:20,0,71:20    0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,10,81:10 0/0:0,3,8:1:0:0:0,1,0,0:1,0:0,7,22:7
+KB704451    4157907 .   A   C   278.0   .   DP=295;VDB=0.241276;SGB=26.7514;RPB=0.676983;MQB=0.997838;MQSB=0.136536;BQB=0.45683;MQ0F=0;DPR=264,15;ICB=0.00518819;HOB=0.00237812;AC=4;AN=116;DP4=233,31,14,1;MQ=59;SNP;VARTYPE=SNP   GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,90,255:30:0:0:25,5,0,0:30,0:0,101,283:101 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41    0/1:157,0,188:18:8:0:10,0,7,1:10,8:145,0,205:127    0/1:75,0,90:5:2:0:2,1,2,0:3,2:63,0,107:63   0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41    0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,17,108:17    0/0:0,12,134:4:0:0:3,1,0,0:4,0:0,23,162:23  0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,32,188:32  0/0:0,12,135:4:0:0:2,2,0,0:4,0:0,23,163:23  0/0:0,15,148:5:0:0:5,0,0,0:5,0:0,26,176:26  0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20    0/1:70,0,19:4:3:0:1,0,3,0:1,3:58,0,36:36    0/0:0,24,246:8:0:0:7,1,0,0:8,0:0,35,274:35  0/0:0,18,147:6:0:0:6,0,0,0:6,0:0,29,175:29  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20    0/1:59,0,62:5:2:0:3,0,2,0:3,2:47,0,79:47    0/0:0,33,192:11:0:0:11,0,0,0:11,0:0,44,220:44   0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,20,122:20    0/0:0,24,198:8:0:0:7,1,0,0:8,0:0,35,226:35  0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,23,148:23  0/0:0,15,165:5:0:0:3,2,0,0:5,0:0,26,193:26  0/0:0,24,172:8:0:0:8,0,0,0:8,0:0,35,200:35  0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,14,59:14 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,17,94:17 0/0:0,15,118:5:0:0:5,0,0,0:5,0:0,26,146:26  0/0:0,33,178:11:0:0:11,0,0,0:11,0:0,44,206:44   0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,9,74:3:0:0:2,1,0,0:3,0:0,20,102:20    0/0:0,21,168:7:0:0:5,2,0,0:7,0:0,32,196:32  0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,14,68:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,17,117:17    0/0:0,12,130:4:0:0:2,2,0,0:4,0:0,23,158:23  0/0:0,9,78:3:0:0:1,2,0,0:3,0:0,20,106:20    0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,17,93:17 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,17,83:17 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,14,57:14 0/0:0,36,194:12:0:0:11,1,0,0:12,0:0,47,222:47   0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,23,138:23  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,182:6:0:0:6,0,0,0:6,0:0,29,210:29  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,14,62:14 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,9,107:3:0:0:3,0,0,0:3,0:0,20,135:20   0/0:0,6,67:2:0:0:1,1,0,0:2,0:0,17,95:17 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,27,184:9:0:0:9,0,0,0:9,0:0,38,212:38  0/0:0,9,85:3:0:0:3,0,0,0:3,0:0,20,113:20    0/0:0,12,111:4:0:0:4,0,0,0:4,0:0,23,139:23  0/0:0,6,77:2:0:0:1,1,0,0:2,0:0,17,105:17    0/0:0,12,108:4:0:0:3,1,0,0:4,0:0,23,136:23  0/0:0,3,27:1:0:0:1,0,0,0:1,0:0,14,55:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,12,125:4:0:0:4,0,0,0:4,0:0,23,153:23  0/0:0,9,98:3:0:0:2,1,0,0:3,0:0,20,126:20    0/0:0,6,46:2:0:0:2,0,0,0:2,0:0,17,74:17
+KB704451    4157909 .   T   G   278.0   .   DP=295;VDB=0.184881;SGB=22.7413;RPB=0.646301;MQB=0.998034;MQSB=0.200514;BQB=0.321842;MQ0F=0;DPR=247,15;ICB=0.00558284;HOB=0.00255102;AC=4;AN=112;DP4=218,29,15,1;MQ=59;SNP;VARTYPE=SNP  GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,87,255:29:0:0:24,5,0,0:29,0:0,97,282:97   0/0:0,27,183:9:0:0:9,0,0,0:9,0:0,37,210:37  0/1:156,0,167:19:8:0:11,0,7,1:11,8:145,0,183:127    0/1:75,0,107:5:2:0:2,1,2,0:3,2:64,0,123:64  0/0:0,27,191:9:0:0:8,1,0,0:9,0:0,37,218:37  0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,16,107:16    0/0:0,12,119:4:0:0:3,1,0,0:4,0:0,22,146:22  0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,15,126:5:0:0:5,0,0,0:5,0:0,25,153:25  0/0:0,12,132:4:0:0:2,2,0,0:4,0:0,22,159:22  0/0:0,12,133:4:0:0:4,0,0,0:4,0:0,22,160:22  0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,16,94:16 0/1:79,9,0:3:3:0:0,0,3,0:0,3:60,0,8:8   0/0:0,21,230:7:0:0:6,1,0,0:7,0:0,31,257:31  0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,28,171:28  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,16,80:16 0/1:59,0,64:5:2:0:3,0,2,0:3,2:48,0,80:48    0/0:0,33,180:11:0:0:11,0,0,0:11,0:0,43,207:43   0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22  0/0:0,24,190:8:0:0:7,1,0,0:8,0:0,34,217:34  0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22  0/0:0,15,164:5:0:0:3,2,0,0:5,0:0,25,191:25  0/0:0,24,161:8:0:0:8,0,0,0:8,0:0,34,188:34  0/0:0,3,32:1:0:0:0,1,0,0:1,0:0,13,59:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,16,92:16 0/0:0,15,121:5:0:0:5,0,0,0:5,0:0,25,148:25  0/0:0,30,174:10:0:0:10,0,0,0:10,0:0,40,201:40   0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,21,164:7:0:0:5,2,0,0:7,0:0,31,191:31  0/0:0,3,37:1:0:0:1,0,0,0:1,0:0,13,64:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,16,116:16    0/0:0,12,128:4:0:0:2,2,0,0:4,0:0,22,155:22  0/0:0,9,94:3:0:0:1,2,0,0:3,0:0,19,121:19    0/0:0,6,63:2:0:0:1,1,0,0:2,0:0,16,90:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,16,83:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,36,193:12:0:0:11,1,0,0:12,0:0,46,220:46   0/0:0,12,108:4:0:0:4,0,0,0:4,0:0,22,135:22  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,168:6:0:0:6,0,0,0:6,0:0,28,195:28  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,13,58:13 0/0:0,6,47:2:0:0:2,0,0,0:2,0:0,16,74:16 0/0:8,11,65:2:1:0:1,0,1,0:1,0:0,13,84:13    0/0:0,6,64:2:0:0:1,1,0,0:2,0:0,16,91:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,27,177:9:0:0:9,0,0,0:9,0:0,37,204:37  0/0:0,6,50:2:0:0:2,0,0,0:2,0:0,16,77:16 0/0:0,12,101:4:0:0:4,0,0,0:4,0:0,22,128:22  0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,16,92:16 0/0:0,12,100:4:0:0:3,1,0,0:4,0:0,22,127:22  0/0:0,3,31:1:0:0:1,0,0,0:1,0:0,13,58:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,19,111:19    0/0:0,3,32:1:0:0:1,0,0,0:1,0:0,13,59:13 0/0:0,12,104:4:0:0:4,0,0,0:4,0:0,22,131:22  0/0:0,6,66:2:0:0:1,1,0,0:2,0:0,16,93:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0
+KB704451    4157927 .   G   A   4.88727 .   DP=334;VDB=0.38;SGB=3.29913;RPB=0.454248;MQB=0.970588;MQSB=0.546099;BQB=0.215686;MQ0F=0;DPR=306,2;ICB=0.000310486;HOB=0.000153894;AC=1;AN=114;DP4=265,41,2,0;MQ=59;SNP;VARTYPE=SNP  GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,105,255:35:0:0:31,4,0,0:35,0:0,122,295:122    0/0:0,45,255:15:0:0:14,1,0,0:15,0:0,62,295:62   0/0:0,60,255:20:0:0:17,3,0,0:20,0:0,77,295:77   0/0:0,21,242:7:0:0:6,1,0,0:7,0:0,38,282:38  0/0:0,24,207:8:0:0:6,2,0,0:8,0:0,41,247:41  0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,23,112:23    0/0:0,24,215:8:0:0:6,2,0,0:8,0:0,41,255:41  0/0:0,6,70:2:0:0:1,1,0,0:2,0:0,23,111:23    0/0:0,30,191:10:0:0:10,0,0,0:10,0:0,47,231:47   0/0:0,15,163:5:0:0:3,2,0,0:5,0:0,32,203:32  0/0:0,15,151:5:0:0:5,0,0,0:5,0:0,32,191:32  0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,23,111:23    0/0:0,12,102:4:0:0:4,0,0,0:4,0:0,29,142:29  0/0:0,24,255:8:0:0:6,2,0,0:8,0:0,41,295:41  0/0:0,21,189:7:0:0:7,0,0,0:7,0:0,38,229:38  0/0:0,3,35:1:0:0:0,1,0,0:1,0:0,20,76:20 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,20,81:20 0/0:0,12,126:4:0:0:3,1,0,0:4,0:0,29,166:29  0/0:0,39,255:13:0:0:12,1,0,0:13,0:0,56,295:56   0/0:0,21,206:7:0:0:6,1,0,0:7,0:0,38,246:38  0/0:0,30,238:10:0:0:8,2,0,0:10,0:0,47,278:47    0/0:0,18,145:6:0:0:6,0,0,0:6,0:0,35,185:35  0/0:0,24,244:8:0:0:6,2,0,0:8,0:0,41,284:41  0/0:0,24,195:8:0:0:7,1,0,0:8,0:0,41,235:41  0/0:0,3,27:1:0:0:0,1,0,0:1,0:0,20,68:20 0/0:0,6,62:2:0:0:2,0,0,0:2,0:0,23,103:23    0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,23,105:23    0/0:0,15,123:5:0:0:5,0,0,0:5,0:0,32,163:32  0/0:0,33,184:11:0:0:11,0,0,0:11,0:0,50,224:50   0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,18,165:6:0:0:4,2,0,0:6,0:0,35,205:35  0/0:0,3,38:1:0:0:1,0,0,0:1,0:0,20,79:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,117:3:0:0:3,0,0,0:3,0:0,26,157:26   0/0:0,12,121:4:0:0:2,2,0,0:4,0:0,29,161:29  0/0:0,9,95:3:0:0:1,2,0,0:3,0:0,26,135:26    0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,20,82:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23    0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 0/0:0,36,213:12:0:0:11,1,0,0:12,0:0,53,253:53   0/0:0,15,152:5:0:0:4,1,0,0:5,0:0,32,192:32  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,215:7:0:0:6,1,0,0:7,0:0,38,255:38  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,20,72:20 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23    0/0:0,9,101:3:0:0:3,0,0,0:3,0:0,26,141:26   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23    0/0:0,27,179:9:0:0:9,0,0,0:9,0:0,44,219:44  0/0:0,9,92:3:0:0:3,0,0,0:3,0:0,26,132:26    0/0:0,12,112:4:0:0:4,0,0,0:4,0:0,29,152:29  0/0:0,6,58:2:0:0:1,1,0,0:2,0:0,23,99:23 0/0:0,15,123:5:0:0:4,1,0,0:5,0:0,32,163:32  0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,78:3:0:0:3,0,0,0:3,0:0,26,118:26    0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,20,75:20 0/1:47,0,51:3:2:0:1,0,2,0:1,2:29,0,74:29    0/0:0,9,80:3:0:0:2,1,0,0:3,0:0,26,120:26    0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,23,94:23
+KB704451    4157938 .   ATTT    ATTTT   650.0   .   INDEL;IDV=18;IMF=0.428571;DP=361;VDB=0.773794;SGB=32.6744;MQSB=0.993251;MQ0F=0.00831025;DPR=115,60;ICB=0.929833;HOB=0.0258;AC=23;AN=100;DP4=98,17,48,12;MQ=59;INS;VARTYPE=INS   GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/1:124,0,7:19:16:0:3,0,13,3:3,16:123,0,13:13   0/1:14,3,0:1:1:0:0,0,1,0:0,1:12,1,5:4   0/0:0,9,55:3:0:0:3,0,0,0:3,0:0,9,62:9   0/0:0,21,146:7:0:0:5,2,0,0:7,0:0,21,153:21  0/0:0,18,126:6:0:0:4,2,0,0:6,0:0,18,133:18  0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7   0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9   1/1:46,9,0:3:3:0:0,0,1,2:0,3:40,4,1:4   0/0:0,27,155:9:0:0:8,1,0,0:9,0:0,27,162:27  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7   0/1:24,3,0:1:1:0:0,0,1,0:0,1:22,1,5:5   0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   0/0:0,9,84:3:0:0:2,1,0,0:3,0:0,9,91:9   0/0:0,15,111:5:0:0:5,0,0,0:5,0:0,15,118:15  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:32,6,0:2:2:0:0,0,1,1:0,2:28,2,3:3   0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,7,62:7   0/1:63,0,61:7:4:0:3,0,3,1:3,4:62,0,67:61    0/0:0,24,154:8:0:0:7,1,0,0:8,0:0,24,161:24  0/1:16,0,94:7:2:0:5,0,2,0:5,2:15,0,100:15   0/1:61,0,101:9:4:0:4,1,3,1:5,4:60,0,107:60  0/0:0,3,31:1:0:0:1,0,0,0:1,0:1,5,39:5   0/1:13,3,0:1:1:0:0,0,0,1:0,1:11,1,5:4   0/1:16,0,47:3:1:0:2,0,1,0:2,1:15,0,53:15    0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   0/1:46,0,57:5:2:0:3,0,2,0:3,2:45,0,63:45    0/1:50,0,12:6:5:0:1,0,5,0:1,5:49,0,18:18    0/0:0,3,30:1:0:0:1,0,0,0:1,0:1,5,38:5   0/0:0,3,4:1:0:0:1,0,0,0:1,0:1,5,12:4    0/1:18,0,98:5:1:0:2,2,1,0:4,1:17,0,104:17   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:15,0,26:2:1:0:1,0,1,0:1,1:14,0,32:14    0/1:20,0,26:2:1:0:1,0,0,1:1,1:19,0,32:19    0/0:0,6,60:2:0:0:1,1,0,0:2,0:0,7,67:7   0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:33,3,0:1:1:0:0,0,1,0:0,1:31,1,5:5   0/1:82,6,0:7:6:0:1,0,5,1:1,6:78,2,3:3   0/0:0,15,126:5:0:0:4,1,0,0:5,0:0,15,133:15  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:34,6,0:2:2:0:0,0,2,0:0,2:30,2,3:3   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,50:2:0:0:1,1,0,0:2,0:0,7,57:7   0/0:0,3,28:1:0:0:1,0,0,0:1,0:1,5,36:5   0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7   0/0:0,18,124:6:0:0:5,1,0,0:6,0:0,18,131:18  0/1:53,6,0:2:2:0:0,0,2,0:0,2:49,2,3:3   0/0:0,12,96:4:0:0:4,0,0,0:4,0:0,12,103:12   0/1:25,3,0:1:1:0:0,0,1,0:0,1:23,1,5:5   1/1:61,9,0:3:3:0:0,0,2,1:0,3:55,4,1:4   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:1,5,39:5   0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7   0/0:0,3,29:1:0:0:1,0,0,0:1,0:1,5,37:5   0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0
+KB704451    4157940 .   TTGTGTGTGTGTGT  TTGTGTGTGTGTGTGTGT,TTTCTGTGTGTGTGTGT    999.0   .   INDEL;IDV=7;IMF=0.5;DP=366;VDB=0.0431342;SGB=14.7456;MQSB=0.996953;MQ0F=0.010929;DPR=86,41,8;ICB=0.963728;HOB=0.02;AC=21,6;AN=90;DP4=70,16,39,10;MQ=58;INS;VARTYPE=INS,INS  GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/2:60,60,60,3,3,0:1:1:0:0,0,1,0:0,0,1:54,55,61,2,5,9:4 1/1:255,18,0,255,18,255:6:6:0:0,0,3,3:0,6,0:248,11,0,252,19,263:11  0/0:0,6,62,6,62,62:2:0:0:2,0,0,0:2,0,0:1,7,70,11,71,78:6    0/1:9,0,238,27,241,255:7:1:0:4,2,1,0:6,1,0:8,0,245,31,249,270:8 0/0:0,12,185,12,185,185:4:0:0:3,1,0,0:4,0,0:0,12,192,16,193,200:11  0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  0/1:1,0,158,10,161,165:4:1:0:2,1,1,0:3,1,0:3,2,167,16,171,182:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,24,255,24,255,255:8:0:0:7,1,0,0:8,0,0:0,24,262,28,263,270:23  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:37,0,54,40,57,94:2:1:0:1,0,1,0:1,1,0:36,0,60,44,64,108:35   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:117,6,0,117,6,117:2:2:0:0,0,1,1:0,2,0:113,3,3,118,10,128:2  0/0:0,18,237,18,237,237:6:0:0:5,1,0,0:6,0,0:0,18,244,22,245,252:17  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/2:120,120,120,6,6,0:2:2:0:0,0,1,1:0,0,2:111,112,119,2,6,7:3   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:255,21,0,255,21,255:7:7:0:0,0,5,2:0,7,0:247,14,0,252,22,263:13  0/2:19,31,181,0,157,154:5:1:0:3,1,1,0:4,0,1:14,27,183,0,160,164:14  0/1:120,0,255,141,255,255:10:3:0:6,1,2,1:7,3,0:119,0,261,145,262,269:119    0/0:0,15,212,15,212,212:5:0:0:5,0,0,0:5,0,0:0,15,219,19,220,227:14  0/0:0,15,243,15,243,243:5:0:0:4,1,0,0:5,0,0:0,15,250,19,251,258:14  1/1:40,9,0,40,9,40:3:3:0:0,0,3,0:0,3,0:34,4,2,39,12,50:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  0/1:54,0,54,57,57,111:2:1:0:1,0,1,0:1,1,0:53,0,60,61,64,125:51  0/0:0,9,139,9,139,139:3:0:0:3,0,0,0:3,0,0:0,10,146,14,147,154:8 1/1:243,15,0,243,15,243:5:5:0:0,0,4,1:0,5,0:236,9,0,241,16,251:8    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:55,1,0,58,3,57:2:1:0:1,0,1,0:1,1,0:54,1,7,62,11,72:5    0/0:0,9,170,9,170,170:3:0:0:1,2,0,0:3,0,0:0,10,177,14,178,185:8 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:60,3,0,60,3,60:1:1:0:0,0,0,1:0,1,0:58,2,5,63,9,73:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,15,206,15,206,206:5:0:0:4,1,0,0:5,0,0:0,15,213,19,214,221:14  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/2:134,105,96,40,0,34:5:5:0:0,0,5,0:0,3,2:125,97,95,36,0,41:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:58,6,0,58,6,58:2:2:0:0,0,2,0:0,2,0:54,3,3,59,10,69:2    0/0:0,6,102,6,102,102:2:0:0:1,1,0,0:2,0,0:1,7,110,11,111,118:6  0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:98,0,88,104,94,191:4:2:0:1,1,2,0:2,2,0:97,0,94,108,101,205:92   0/2:35,35,35,3,3,0:1:1:0:0,0,1,0:0,0,1:29,30,36,2,5,9:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/2:45,45,45,3,3,0:1:1:0:0,0,1,0:0,0,1:39,40,46,2,5,9:4 0/0:0,3,60,3,60,60:1:0:0:0,1,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,120,6,120,120:2:0:0:1,1,0,0:2,0,0:1,7,128,11,129,136:6  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 2017653..bb4ce37 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -114,6 +114,26 @@ def test_vcf_4_1_bnd(self):
                 print(c)
                 assert c
 
+    def test_vcf_4_2(self):
+        reader = vcf.Reader(fh('example-4.2.vcf'))
+        self.assertEqual(reader.metadata['fileformat'],  'VCFv4.2')
+
+        # If INFO contains no Source and Version keys, they should be None.
+        self.assertEqual(reader.infos['DP'].source, None)
+        self.assertEqual(reader.infos['DP'].version, None)
+
+        # According to spec, INFO Version key is required to be double quoted,
+        # but at least SAMtools 1.0 does not quote it. So we want to be
+        # forgiving here.
+        self.assertEqual(reader.infos['VDB'].source, None)
+        self.assertEqual(reader.infos['VDB'].version, '3')
+
+        # test we can walk the file at least
+        for r in reader:
+            for c in r:
+                assert c
+
+
 class TestGatkOutput(unittest.TestCase):
 
     filename = 'gatk.vcf'

From f6e955ff2c47be3d41a29297734ebc7ad09848c5 Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Mon, 8 Sep 2014 23:12:36 -0700
Subject: [PATCH 111/168] Bugfix: SNP records with N as ALT now noted as SNPs.

The VCF 4.0 and newer specifications say the ALT field is a comma
separated list that includes "base Strings made up of the bases
A,C,G,T,N". Notably, the last case was not handled by `Record.is_snp`,
causing it to erroneously report `False` for records with "N" as the ALT.
---
 vcf/model.py         |  2 +-
 vcf/test/test_vcf.py | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/vcf/model.py b/vcf/model.py
index 68281ec..3d787ef 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -376,7 +376,7 @@ def is_snp(self):
         for alt in self.ALT:
             if alt is None or alt.type != "SNV":
                 return False
-            if alt not in ['A', 'C', 'G', 'T']:
+            if alt not in ['A', 'C', 'G', 'T', 'N']:
                 return False
         return True
 
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index bb4ce37..ecb0ddb 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -561,6 +561,24 @@ def test_is_snp(self):
             elif var.POS == 1234567:
                 self.assertEqual(False, is_snp)
 
+
+    def test_is_snp_for_n_alt(self):
+        record = model._Record(
+                '1',
+                10,
+                'id1',
+                'C',
+                [model._Substitution('N')],
+                None,
+                None,
+                {},
+                None,
+                {},
+                None
+        )
+        self.assertTrue(record.is_snp)
+
+
     def test_is_indel(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for var in reader:

From 0a993e13007cc8adbaf17bd61612ccaafd4f983d Mon Sep 17 00:00:00 2001
From: Chris Lasher <chris.lasher@gmail.com>
Date: Mon, 8 Sep 2014 23:17:51 -0700
Subject: [PATCH 112/168] Run tests for Python 3.4.

---
 .travis.yml | 1 +
 tox.ini     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1e1b142..1fdfd54 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ python:
   - "2.7"
   - "3.2"
   - "3.3"
+  - "3.4"
   - "pypy"
 install:
   - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install -r requirements/python2.6-requirements.txt; elif [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi"
diff --git a/tox.ini b/tox.ini
index 953a9dc..64a7ab4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py26, py27, py32, py33
+envlist = py26, py27, py32, py33, py34
 
 [testenv]
 deps =

From e8a05d9beafb85c2552e934229479b179c18fafe Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sat, 13 Sep 2014 09:26:18 +0200
Subject: [PATCH 113/168] Add Python 3.4 trove classifier

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index fb4e512..a266207 100644
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.2',
         'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
         'Topic :: Scientific/Engineering :: Bio-Informatics',
       ],
     keywords='bioinformatics',

From 82d8288eeaa812992671f3490a5103694eaad738 Mon Sep 17 00:00:00 2001
From: awenger <aaron.m.wenger@gmail.com>
Date: Tue, 16 Sep 2014 17:11:46 -0700
Subject: [PATCH 114/168] Add test cases for uncalled genotypes support

* Remember the ploidity of uncalled genotypes such that
  the sample genotypes written by PyVCF.Writer match the
  sample genotypes read by PyVCF.Reader.
* For uncalled _Calls, gt_nums and gt_bases are None;
  gt_alleles is a list of "None" with a length of _Call.ploidity.
---
 vcf/test/test_vcf.py            | 62 +++++++++++++++++++++++++++++++++
 vcf/test/uncalled_genotypes.vcf |  7 ++++
 2 files changed, 69 insertions(+)
 create mode 100644 vcf/test/uncalled_genotypes.vcf

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ecb0ddb..4d219d8 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1379,6 +1379,67 @@ def test_meta(self):
         self.assertEqual(reader.metadata['GATKCommandLine'][1]['CommandLineOptions'], '"analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]"')
 
 
+
+class TestUncalledGenotypes(unittest.TestCase):
+    """Test the handling of uncalled (., ./.) sample genotypes."""
+
+    def test_read_uncalled(self):
+        """Test that uncalled genotypes are properly read into
+        gt_nums, gt_bases, ploidity, and gt_alleles properties
+        of _Call objects.  For uncalled _Call objects:
+
+        - gt_nums should be None
+        - gt_bases should be None
+        - ploidity should match the input ploidity
+        - gt_alleles should be a list of None's with length
+          matching the ploidity"""
+
+        reader = vcf.Reader(fh('uncalled_genotypes.vcf'))
+        for var in reader:
+            gt_bases = [s.gt_bases for s in var.samples]
+            gt_nums = [s.gt_nums for s in var.samples]
+            ploidity = [s.ploidity for s in var.samples]
+            gt_alleles = [s.gt_alleles for s in var.samples]
+
+            if var.POS == 14370:
+                self.assertEqual(['0|0', None, '1/1'], gt_nums)
+                self.assertEqual(['G|G', None, 'A/A'], gt_bases)
+                self.assertEqual([2,2,2], ploidity)
+                self.assertEqual([['0','0'], [None,None], ['1','1']], gt_alleles)
+            elif var.POS == 17330:
+                self.assertEqual([None, '0|1', '0/0'], gt_nums)
+                self.assertEqual([None, 'T|A', 'T/T'], gt_bases)
+                self.assertEqual([3,2,2], ploidity)
+                self.assertEqual([[None,None,None], ['0','1'], ['0','0']], gt_alleles)
+            elif var.POS == 1234567:
+                self.assertEqual(['0/1', '0/2', None], gt_nums)
+                self.assertEqual(['GTC/G', 'GTC/GTCT', None], gt_bases)
+                self.assertEqual([2,2,1], ploidity)
+                self.assertEqual([['0','1'], ['0','2'], [None]], gt_alleles)
+
+
+    def test_write_uncalled(self):
+        """Test that uncalled genotypes are written just as
+        they were read in the input file."""
+
+        reader = vcf.Reader(fh('uncalled_genotypes.vcf'))
+
+        # Write all reader records to a stream.
+        out = StringIO()
+        writer = vcf.Writer(out, reader, lineterminator='\n')
+        for record in reader:
+            writer.write_record(record)
+
+
+        # Compare the written stream to the input reader line-by-line.
+        out.seek(0)
+        out_lines = out.getvalue().split('\n')
+        in_lines = [l.rstrip('\n') for l in fh('uncalled_genotypes.vcf')]
+        for (in_line, out_line) in zip(in_lines, out_lines):
+            self.assertEqual(in_line,out_line)
+
+
+
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestVcfSpecs))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFreebayesOutput))
@@ -1404,3 +1465,4 @@ def test_meta(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUtils))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGATKMeta))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncalledGenotypes))
diff --git a/vcf/test/uncalled_genotypes.vcf b/vcf/test/uncalled_genotypes.vcf
new file mode 100644
index 0000000..2032097
--- /dev/null
+++ b/vcf/test/uncalled_genotypes.vcf
@@ -0,0 +1,7 @@
+##fileformat=VCFv4.2
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	AB00001	AB00002	AB00003
+20	14370	rs6054257	G	A	29	PASS	NS=3	GT	0|0	./.	1/1
+20	17330	.	T	A	3	q10	NS=3	GT	././.	0|1	0/0
+20	1234567	microsat1	GTC	G,GTCT	50	PASS	NS=3	GT	0/1	0/2	.

From 6f7b3d9ca59633293de2879af42ab32a4e5a36aa Mon Sep 17 00:00:00 2001
From: awenger <aaron.m.wenger@gmail.com>
Date: Tue, 16 Sep 2014 17:48:53 -0700
Subject: [PATCH 115/168] Close file handles in TestUncalledGenotypes tests

Warning about open file handles muddle the output of unit tests
and are a potentially confusing factor to those interpreting
the tests.
---
 vcf/test/test_vcf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 4d219d8..ce26863 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1416,6 +1416,7 @@ def test_read_uncalled(self):
                 self.assertEqual(['GTC/G', 'GTC/GTCT', None], gt_bases)
                 self.assertEqual([2,2,1], ploidity)
                 self.assertEqual([['0','1'], ['0','2'], [None]], gt_alleles)
+        reader._reader.close()
 
 
     def test_write_uncalled(self):
@@ -1429,12 +1430,15 @@ def test_write_uncalled(self):
         writer = vcf.Writer(out, reader, lineterminator='\n')
         for record in reader:
             writer.write_record(record)
+        reader._reader.close()
 
 
         # Compare the written stream to the input reader line-by-line.
         out.seek(0)
         out_lines = out.getvalue().split('\n')
-        in_lines = [l.rstrip('\n') for l in fh('uncalled_genotypes.vcf')]
+        in_file = fh('uncalled_genotypes.vcf')
+        in_lines = [l.rstrip('\n') for l in in_file]
+        in_file.close()
         for (in_line, out_line) in zip(in_lines, out_lines):
             self.assertEqual(in_line,out_line)
 

From 14e4837511600d20a38f9e49f70a19ddc1abeb76 Mon Sep 17 00:00:00 2001
From: awenger <aaron.m.wenger@gmail.com>
Date: Tue, 16 Sep 2014 18:43:05 -0700
Subject: [PATCH 116/168] Add support for uncalled genotypes

---
 vcf/cparse.pyx |  5 ++++-
 vcf/model.py   | 26 +++++++++++++-------------
 vcf/parser.py  |  5 ++++-
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index 682e6a7..a3cb4b3 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -36,7 +36,10 @@ def parse_samples(
             vals = sampvals[j]
 
             # short circuit the most common
-            if vals == '.' or vals == './.':
+            if samp_fmt._fields[j] == 'GT':
+                sampdat[j] = vals
+                continue
+            elif vals == '.':
                 sampdat[j] = None
                 continue
 
diff --git a/vcf/model.py b/vcf/model.py
index 3d787ef..c1d5710 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -1,17 +1,19 @@
 from abc import ABCMeta, abstractmethod
 import collections
 import sys
+import re
 
 try:
     from collections import Counter
 except ImportError:
     from counter import Counter
 
+allele_delimiter = re.compile(r'''[|/]''') # to split a genotype into alleles
 
 class _Call(object):
     """ A genotype call, a cell entry in a VCF file"""
 
-    __slots__ = ['site', 'sample', 'data', 'gt_nums', 'called']
+    __slots__ = ['site', 'sample', 'data', 'gt_nums', 'gt_alleles', 'called', 'ploidity']
 
     def __init__(self, site, sample, data):
         #: The ``_Record`` for this ``_Call``
@@ -20,14 +22,18 @@ def __init__(self, site, sample, data):
         self.sample = sample
         #: Dictionary of data from the VCF file
         self.data = data
-        try:
-            self.gt_nums = self.data.GT
-            #: True if the GT is not ./.
-            self.called = self.gt_nums is not None
-        except AttributeError:
-            self.gt_nums = None
+
+        if hasattr(self.data, 'GT'):
+            self.gt_alleles = [(al if al != '.' else None) for al in allele_delimiter.split(self.data.GT)]
+            self.ploidity = len(self.gt_alleles)
+            self.called = all([al != None for al in self.gt_alleles])
+            self.gt_nums = self.data.GT if self.called else None
+        else:
             #62 a call without a genotype is not defined as called or not
+            self.gt_alleles = None
+            self.ploidity = None
             self.called = None
+            self.gt_nums = None
 
     def __repr__(self):
         return "Call(sample=%s, %s)" % (self.sample, str(self.data))
@@ -50,12 +56,6 @@ def __setstate__(self, state):
     def gt_phase_char(self):
         return "/" if not self.phased else "|"
 
-    @property
-    def gt_alleles(self):
-        '''The numbers of the alleles called at a given sample'''
-        # grab the numeric alleles of the gt string; tokenize by phasing
-        return self.gt_nums.split(self.gt_phase_char())
-
     @property
     def gt_bases(self):
         '''The actual genotype alleles.
diff --git a/vcf/parser.py b/vcf/parser.py
index 6d668af..1284ddf 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -459,7 +459,10 @@ def _parse_samples(self, samples, samp_fmt, site):
             for i, vals in enumerate(sample.split(':')):
 
                 # short circuit the most common
-                if vals == '.' or vals == './.':
+                if samp_fmt._fields[i] == 'GT':
+                    sampdat[i] = vals
+                    continue
+                elif vals == ".":
                     sampdat[i] = None
                     continue
 

From 80a638cf0a7df39fa3765797f281428d929fbc10 Mon Sep 17 00:00:00 2001
From: awenger <aaron.m.wenger@gmail.com>
Date: Tue, 16 Sep 2014 18:44:09 -0700
Subject: [PATCH 117/168] Simplify _format_sample logic

The sample.data.GT attribute is no longer set to None for
uncalled calls, which means that _format_sample can now
rely on obtaining the original sample genotype.
---
 vcf/parser.py | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 1284ddf..3c36c31 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -726,28 +726,14 @@ def order_key(field):
                         sorted(info, key=order_key))
 
     def _format_sample(self, fmt, sample):
-        try:
-            # Try to get the GT value first.
-            gt = getattr(sample.data, 'GT')
-            # PyVCF stores './.' GT values as None, so we need to revert it back
-            # to './.' when writing.
-            if gt is None:
-                gt = './.'
-        except AttributeError:
-            # Failing that, try to check whether 'GT' is specified in the FORMAT
-            # field. If yes, use the recommended empty value ('./.')
-            if 'GT' in fmt:
-                gt = './.'
-            # Otherwise use an empty string as the value
-            else:
-                gt = ''
-        # If gt is an empty string (i.e. not stored), write all other data
+        if hasattr(sample.data, 'GT'):
+            gt = sample.data.GT
+        else:
+            gt = './.' if 'GT' in fmt else ''
+
         if not gt:
             return ':'.join([self._stringify(x) for x in sample.data])
-        # Otherwise use the GT values from above and combine it with the rest of
-        # the data.
-        # Note that this follows the VCF spec, where GT is always the first
-        # item whenever it is present.
+        # Following the VCF spec, GT is always the first item whenever it is present.
         else:
             return ':'.join([gt] + [self._stringify(x) for x in sample.data[1:]])
 

From 28725da42749ff5d3fa1f5e6ecaa636fe9493f86 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 10 Oct 2014 20:38:02 +0200
Subject: [PATCH 118/168] Tolerate equals sign in INFO field value

Fixes #181
---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 3c36c31..411d94d 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -367,7 +367,7 @@ def _parse_info(self, info_str):
         retdict = {}
 
         for entry in entries:
-            entry = entry.split('=')
+            entry = entry.split('=', 1)
             ID = entry[0]
             try:
                 entry_type = self.infos[ID].type

From 2fceb0c2aaa4e864523883c021d81f89d711ab15 Mon Sep 17 00:00:00 2001
From: David Caplan <dcaplan@gmail.com>
Date: Fri, 24 Oct 2014 10:50:00 -0400
Subject: [PATCH 119/168] fix double quoting issue when writing VCFs

---
 vcf/parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 411d94d..1f72b64 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -634,7 +634,9 @@ class Writer(object):
     counts = dict((v,k) for k,v in field_counts.iteritems())
 
     def __init__(self, stream, template, lineterminator="\n"):
-        self.writer = csv.writer(stream, delimiter="\t", lineterminator=lineterminator)
+        self.writer = csv.writer(stream, delimiter="\t",
+                                 lineterminator=lineterminator,
+                                 quotechar='', quoting=csv.QUOTE_NONE)
         self.template = template
         self.stream = stream
 

From 35ebae14f1f53b75b01ab212b4083598f2edc10a Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Mon, 10 Nov 2014 16:27:21 +0100
Subject: [PATCH 120/168] Blacklist pysam 0.8.0 in unit tests (fails on Python
 3)

The issue in 0.8.0 seems to be fixed in 0.8.1, so it's now safe to
just blacklist 0.8.0 specifically.

See #175
---
 requirements/common-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/common-requirements.txt b/requirements/common-requirements.txt
index 7bbf965..876b75e 100644
--- a/requirements/common-requirements.txt
+++ b/requirements/common-requirements.txt
@@ -1,3 +1,3 @@
 cython
-pysam==0.7.8
+pysam!=0.8.0
 setuptools

From 2c8d94fc4b9a649df0ddbba4543051e7af40a7ad Mon Sep 17 00:00:00 2001
From: chapmanb <chapmanb@50mail.com>
Date: Sun, 15 Feb 2015 19:38:21 -0500
Subject: [PATCH 121/168] Support ##contig headers with only ID attributes.
 Generated by bcftools 1.2 when inputs have no ##contig information

---
 vcf/parser.py              | 18 +++++++++---------
 vcf/test/contig_idonly.vcf |  5 +++++
 vcf/test/test_vcf.py       | 11 +++++++++++
 3 files changed, 25 insertions(+), 9 deletions(-)
 create mode 100644 vcf/test/contig_idonly.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index 1f72b64..bc51ee9 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -104,16 +104,17 @@ def __init__(self):
             Description="(?P<desc>.*)"
             >''', re.VERBOSE)
         self.contig_pattern = re.compile(r'''\#\#contig=<
-            ID=(?P<id>[^,]+),
-            .*
-            length=(?P<length>-?\d+)
+            ID=(?P<id>[^>,]+)
+            (,.*length=(?P<length>-?\d+))?
             .*
             >''', re.VERBOSE)
         self.meta_pattern = re.compile(r'''##(?P<key>.+?)=(?P<val>.+)''')
 
     def vcf_field_count(self, num_str):
         """Cast vcf header numbers to integer or None"""
-        if num_str not in field_counts:
+        if num_str is None:
+            return None
+        elif num_str not in field_counts:
             # Fixed, specified number
             return int(num_str)
         else:
@@ -176,14 +177,10 @@ def read_contig(self, contig_string):
         if not match:
             raise SyntaxError(
                 "One of the contig lines is malformed: %s" % contig_string)
-
         length = self.vcf_field_count(match.group('length'))
-
         contig = _Contig(match.group('id'), length)
-
         return (match.group('id'), contig)
 
-
     def read_meta_hash(self, meta_string):
         items = re.split("[<>]", meta_string)
         # Removing initial hash marks and final equal sign
@@ -668,7 +665,10 @@ def __init__(self, stream, template, lineterminator="\n"):
         for line in template.alts.itervalues():
             stream.write(two.format(key="ALT", *line))
         for line in template.contigs.itervalues():
-            stream.write('##contig=<ID={0},length={1}>\n'.format(*line))
+            if line.length:
+                stream.write('##contig=<ID={0},length={1}>\n'.format(*line))
+            else:
+                stream.write('##contig=<ID={0}>\n'.format(*line))
 
         self._write_header()
 
diff --git a/vcf/test/contig_idonly.vcf b/vcf/test/contig_idonly.vcf
new file mode 100644
index 0000000..5e5a6ad
--- /dev/null
+++ b/vcf/test/contig_idonly.vcf
@@ -0,0 +1,5 @@
+##fileformat=VCFv4.2
+##contig=<ID=1>
+##contig=<ID=2,length=2000>
+##contig=<ID=3,assembly=b37,length=3000>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ce26863..3782d12 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -133,6 +133,17 @@ def test_vcf_4_2(self):
             for c in r:
                 assert c
 
+    def test_contig_idonly(self):
+        """Test VCF inputs with ##contig inputs containing only IDs. produced by bcftools 1.2+
+        """
+        reader = vcf.Reader(fh("contig_idonly.vcf"))
+        for cid, contig in reader.contigs.items():
+            if cid == "1":
+                assert contig.length is None
+            elif cid == "2":
+                assert contig.length == 2000
+            elif cid == "3":
+                assert contig.length == 3000
 
 class TestGatkOutput(unittest.TestCase):
 

From 5864f83bdbdc431fba543daf65a1a9604ef028fb Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sat, 14 Mar 2015 22:17:22 +0100
Subject: [PATCH 122/168] Allow for whitespace after commas in metadata lines

Fixes #192
---
 vcf/parser.py                    | 20 ++++++------
 vcf/test/metadata-whitespace.vcf | 56 ++++++++++++++++++++++++++++++++
 vcf/test/test_vcf.py             | 22 +++++++++++++
 3 files changed, 88 insertions(+), 10 deletions(-)
 create mode 100644 vcf/test/metadata-whitespace.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index bc51ee9..2124798 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -82,25 +82,25 @@ class _vcf_metadata_parser(object):
     def __init__(self):
         super(_vcf_metadata_parser, self).__init__()
         self.info_pattern = re.compile(r'''\#\#INFO=<
-            ID=(?P<id>[^,]+),
-            Number=(?P<number>-?\d+|\.|[AGR]),
-            Type=(?P<type>Integer|Float|Flag|Character|String),
+            ID=(?P<id>[^,]+),\s*
+            Number=(?P<number>-?\d+|\.|[AGR]),\s*
+            Type=(?P<type>Integer|Float|Flag|Character|String),\s*
             Description="(?P<desc>[^"]*)"
-            (?:,Source="(?P<source>[^"]*)")?
-            (?:,Version="?(?P<version>[^"]*)"?)?
+            (?:,\s*Source="(?P<source>[^"]*)")?
+            (?:,\s*Version="?(?P<version>[^"]*)"?)?
             >''', re.VERBOSE)
         self.filter_pattern = re.compile(r'''\#\#FILTER=<
-            ID=(?P<id>[^,]+),
+            ID=(?P<id>[^,]+),\s*
             Description="(?P<desc>[^"]*)"
             >''', re.VERBOSE)
         self.alt_pattern = re.compile(r'''\#\#ALT=<
-            ID=(?P<id>[^,]+),
+            ID=(?P<id>[^,]+),\s*
             Description="(?P<desc>[^"]*)"
             >''', re.VERBOSE)
         self.format_pattern = re.compile(r'''\#\#FORMAT=<
-            ID=(?P<id>.+),
-            Number=(?P<number>-?\d+|\.|[AGR]),
-            Type=(?P<type>.+),
+            ID=(?P<id>.+),\s*
+            Number=(?P<number>-?\d+|\.|[AGR]),\s*
+            Type=(?P<type>.+),\s*
             Description="(?P<desc>.*)"
             >''', re.VERBOSE)
         self.contig_pattern = re.compile(r'''\#\#contig=<
diff --git a/vcf/test/metadata-whitespace.vcf b/vcf/test/metadata-whitespace.vcf
new file mode 100644
index 0000000..c163f9a
--- /dev/null
+++ b/vcf/test/metadata-whitespace.vcf
@@ -0,0 +1,56 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS, Description="All filters passed">
+##samtoolsVersion=1.0-17-gfaf4dd6+htslib-1.0-11-g830ea73
+##samtoolsCommand=samtools mpileup -u -t DP,DPR,DV,DP4,INFO/DPR,SP -f /data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa -r KB704451:0004153102-0004172483 huge_list_of_bam_files_removed
+##reference=file:///data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa
+##contig=<ID=KB704451,length=13998812>
+##ALT=<ID=X,Description="Represents allele(s) other than observed.">
+##INFO=<ID=INDEL, Number=0, Type=Flag, Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IDV, Number=1, Type=Integer, Description="Maximum number of reads supporting an indel">
+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version=3>
+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">
+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">
+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">
+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">
+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">
+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">
+##FORMAT=<ID=PL, Number=G, Type=Integer, Description="List of Phred-scaled genotype likelihoods">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">
+##FORMAT=<ID=DPR,Number=R,Type=Integer,Description="Number of high-quality bases observed for each allele">
+##INFO=<ID=DPR,Number=R,Type=Integer,Description="Number of high-quality bases observed for each allele">
+##FORMAT=<ID=DP4, Number=4,   Type=Integer,   Description="Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases">
+##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled Genotype Quality">
+##FORMAT=<ID=GP,Number=G,Type=Float,Description="Phred-scaled genotype posterior probabilities">
+##INFO=<ID=ICB,Number=1,Type=Float,Description="Inbreeding Coefficient Binomial test (bigger is better)">
+##INFO=<ID=HOB,Number=1,Type=Float,Description="Bias in the number of HOMs number (smaller is better)">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality">
+##bcftools_callVersion=1.0-55-gc661821+htslib-1.0-11-g830ea73
+##bcftools_callCommand=call -m -vM -f GQ,GP
+##SnpSiftVersion="SnpSift 3.6c (build 2014-05-20), by Pablo Cingolani"
+##SnpSiftCmd="SnpSift varType - "
+##INFO=<ID=VARTYPE, Number=A, Type=Flag, Description="Variant types {SNP,MNP,INS,DEL,Mixed}">
+##INFO=<ID=SNP, Number=0, Type=Flag, Description="Variant is a SNP">
+##INFO=<ID=MNP,Number=0,Type=Flag,Description="Variant is an MNP">
+##INFO=<ID=INS,	Number=0,	Type=Flag,	Description="Variant is an insertion">
+##INFO=<ID=DEL,Number=0,Type=Flag,Description="Variant is an deletion">
+##INFO=<ID=MIXED,Number=0,Type=Flag,Description="Variant is mixture of INS/DEL/SNP/MNP">
+##INFO=<ID=HOM,   Number=0,Type=Flag,     Description="Variant is homozygous">
+##INFO=<ID=HET,Number=0,Type=Flag,Description="Variant is heterozygous">
+##INFO=<ID=VARTYPE,Number=A,Type=String,Description="Comma separated list of variant types. One per allele">
+#CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  LUPI059 MINE001 OKJ042  LUPI001 LUPI007 LUPI024 LUPI056 LUPI071 LUPI074 LUPI082 MINE040 MINE100 MINE101 MINE105 MINE111 OKJ017  OKJ045  OKJ070  SAGA066 SAGA107 SAGA131 SAGA133 SAGA134 SAGA141 2012L_LUPI_002  2012L_LUPI_015  2012L_LUPI_017  2012L_LUPI_018  2012L_LUPI_035  2012L_LUPI_062  2012L_LUPI_065  2012L_LUPI_077  2012L_LUPI_083  2012L_LUPI_116  2012L_LUPI_013  2012L_LUPI_041  2012L_LUPI_068  2012L_LUPI_096  2012L_LUPI_098  2012L_LUPI_101  2012L_LUPI_103  2012_LUPI_156   2012_LUPI_157   2012_LUPI_161   2012_LUPI_171   2012_LUPI_173   2012_LUPI_180   2012L_LUPI_010  2012L_LUPI_012  2012L_LUPI_021  2012L_LUPI_045  2012L_LUPI_047  2012L_LUPI_060  2012L_LUPI_061  2012L_LUPI_067  2012_LUPI_125   2012_LUPI_129   2012_LUPI_146   2012_LUPI_178   2012_LUPI_211   2012_LUPI_277   2012_LUPI_278   2012_LUPI_279   2012_LUPI_284
+KB704451    4157846 .   N   A,C 167.0   .   DP=10;VDB=1.17174e-06;SGB=1.26353;MQ0F=0;DPR=0,6,4;AC=10,4;AN=14;DP4=0,0,10,0;MQ=60;SNP;VARTYPE=SNP,SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    1/2:74,23,14,57,0,54:4:4:0:0,0,4,0:0,3,1:144,56,16,90,0,57:16   1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0
+KB704451    4157870 .   T   C   275.0   .   DP=243;VDB=0.00023935;SGB=29.4468;RPB=0.0368658;MQB=0.979612;MQSB=0.268441;BQB=0.99223;MQ0F=0;DPR=213,19;ICB=0.85092;HOB=0.0287274;AC=6;AN=118;DP4=201,12,19,1;MQ=53;SNP;VARTYPE=SNP    GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,66,255:22:0:0:20,2,0,0:22,0:0,75,279:75   0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,21,144:21  1/1:193,36,0:12:12:0:0,0,12,0:0,12:168,20,0:20  0/0:0,9,95:3:0:0:3,0,0,0:3,0:0,18,119:18    0/1:78,0,110:7:3:0:3,1,3,0:4,3:68,0,125:68  0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,12,64:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,90:3:0:0:3,0,0,0:3,0:0,18,114:18    0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21  0/0:0,9,97:3:0:0:3,0,0,0:3,0:0,18,121:18    0/0:0,15,122:5:0:0:5,0,0,0:5,0:0,24,146:24  0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,15,95:15 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,15,82:15 0/0:0,18,155:6:0:0:6,0,0,0:6,0:0,27,179:27  0/0:0,3,39:1:0:0:1,0,0,0:1,0:0,12,63:12 0/1:35,3,0:1:1:0:0,0,1,0:0,1:23,0,12:12 0/0:0,9,87:3:0:0:3,0,0,0:3,0:0,18,111:18    0/1:47,0,104:6:2:0:4,0,2,0:4,2:37,0,119:37  0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,30,184:30  0/0:0,6,35:2:0:0:2,0,0,0:2,0:0,15,59:15 0/0:0,12,98:4:0:0:4,0,0,0:4,0:0,21,122:21   0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,15,94:15 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21  0/0:0,3,29:1:0:0:0,1,0,0:1,0:0,12,53:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,76:3:0:0:3,0,0,0:3,0:0,18,100:18    0/0:0,15,136:5:0:0:5,0,0,0:5,0:0,24,160:24  0/0:0,30,182:10:0:0:10,0,0,0:10,0:0,39,206:39   0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,15,93:15 0/0:0,27,152:9:0:0:9,0,0,0:9,0:0,36,176:36  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,15,118:15    0/0:0,21,195:7:0:0:5,2,0,0:7,0:0,30,219:30  0/0:0,9,92:3:0:0:2,1,0,0:3,0:0,18,116:18    0/1:33,0,18:2:1:0:0,1,1,0:1,1:23,0,33:23    0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,12,59:12 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,18,115:18    0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,30,212:10:0:0:9,1,0,0:10,0:0,39,236:39    0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18    ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,195:7:0:0:7,0,0,0:7,0:0,30,219:30  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,97:3:0:0:2,1,0,0:3,0:0,18,121:18    0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,18,117:18    0/0:0,9,116:3:0:0:3,0,0,0:3,0:0,18,140:18   0/0:0,6,71:2:0:0:1,1,0,0:2,0:0,15,95:15 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18    0/0:0,33,175:11:0:0:11,0,0,0:11,0:0,42,199:42   0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,15,87:15 0/0:0,21,145:7:0:0:7,0,0,0:7,0:0,30,169:30  0/0:0,3,39:1:0:0:0,1,0,0:1,0:0,12,63:12 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,18,108:18    0/0:0,3,13:1:0:0:1,0,0,0:1,0:0,12,37:12 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,12,47:12 0/0:0,12,106:4:0:0:4,0,0,0:4,0:0,21,130:21  0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,18,118:18    0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,15,91:15 0/0:2,5,27:2:1:0:1,0,0,1:1,0:0,12,49:12
+KB704451    4157877 .   G   A   999.0   .   DP=250;VDB=6.58963e-09;SGB=31.659;RPB=0.0227135;MQB=0.410318;MQSB=0.139343;BQB=0.0767891;MQ0F=0;DPR=188,48;ICB=0.990841;HOB=0.00761276;AC=17;AN=118;DP4=176,12,45,3;MQ=55;SNP;VARTYPE=SNP   GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/1:159,0,202:22:9:0:12,1,8,1:13,9:154,0,212:127    0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,16,134:16  0/0:0,51,207:17:0:0:17,0,0,0:17,0:0,55,221:55   0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13    0/1:123,0,61:8:5:0:3,0,4,1:3,5:118,0,71:71  0/0:0,3,38:1:0:0:0,1,0,0:1,0:0,7,52:7   0/1:68,0,29:3:2:0:1,0,1,1:1,2:63,0,39:39    0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,10,83:10 0/0:0,12,119:4:0:0:4,0,0,0:4,0:0,16,133:16  0/1:34,0,34:2:1:0:1,0,1,0:1,1:29,0,44:29    0/1:24,0,99:5:1:0:4,0,1,0:4,1:19,0,109:19   0/1:34,0,28:2:1:0:1,0,1,0:1,1:29,0,38:29    0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,10,72:10 0/1:122,0,57:7:4:0:3,0,4,0:3,4:117,0,67:67  0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,7,55:7   0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,7,43:7   0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16  0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,22,158:22  0/1:118,0,63:8:5:0:3,0,5,0:3,5:113,0,73:73  0/0:0,6,34:2:0:0:2,0,0,0:2,0:0,10,48:10 0/0:0,15,131:5:0:0:5,0,0,0:5,0:0,19,145:19  0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,10,86:10 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,10,103:10    1/1:124,12,0:4:4:0:0,0,4,0:0,4:112,4,2:4    0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,7,48:7   0/0:0,6,73:2:0:0:2,0,0,0:2,0:0,10,87:10 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,13,105:13    0/0:0,15,138:5:0:0:5,0,0,0:5,0:0,19,152:19  0/0:0,30,179:10:0:0:10,0,0,0:10,0:0,34,193:34   0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,10,79:10 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,10,84:10 0/0:0,27,155:9:0:0:9,0,0,0:9,0:0,31,169:31  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,10,108:10    0/0:0,15,161:5:0:0:3,2,0,0:5,0:0,19,175:19  0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,10,79:10 0/1:36,3,0:1:1:0:0,0,1,0:0,1:29,0,7:7   0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,13,107:13    0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,7,48:7   0/1:87,0,137:10:4:0:5,1,4,0:6,4:82,0,147:82 0/1:57,0,26:3:2:0:1,0,2,0:1,2:52,0,36:35    ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:139,0,73:7:4:0:3,0,4,0:3,4:134,0,83:83  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,75:2:0:0:1,1,0,0:2,0:0,10,89:10 0/1:90,9,0:3:3:0:0,0,3,0:0,3:79,2,3:3   0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13    0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,9,88:3:0:0:3,0,0,0:3,0:0,13,102:13    0/0:0,33,173:11:0:0:11,0,0,0:11,0:0,37,187:37   0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,10,71:10 0/0:0,15,125:5:0:0:5,0,0,0:5,0:0,19,139:19  0/0:0,6,61:2:0:0:1,1,0,0:2,0:0,10,75:10 0/1:24,0,51:3:1:0:2,0,1,0:2,1:19,0,61:19    0/0:0,3,30:1:0:0:1,0,0,0:1,0:0,7,44:7   0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,7,37:7   0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16  0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,7,49:7   0/1:25,0,61:3:1:0:2,0,1,0:2,1:20,0,71:20    0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,10,81:10 0/0:0,3,8:1:0:0:0,1,0,0:1,0:0,7,22:7
+KB704451    4157907 .   A   C   278.0   .   DP=295;VDB=0.241276;SGB=26.7514;RPB=0.676983;MQB=0.997838;MQSB=0.136536;BQB=0.45683;MQ0F=0;DPR=264,15;ICB=0.00518819;HOB=0.00237812;AC=4;AN=116;DP4=233,31,14,1;MQ=59;SNP;VARTYPE=SNP   GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,90,255:30:0:0:25,5,0,0:30,0:0,101,283:101 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41    0/1:157,0,188:18:8:0:10,0,7,1:10,8:145,0,205:127    0/1:75,0,90:5:2:0:2,1,2,0:3,2:63,0,107:63   0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41    0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,17,108:17    0/0:0,12,134:4:0:0:3,1,0,0:4,0:0,23,162:23  0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,32,188:32  0/0:0,12,135:4:0:0:2,2,0,0:4,0:0,23,163:23  0/0:0,15,148:5:0:0:5,0,0,0:5,0:0,26,176:26  0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20    0/1:70,0,19:4:3:0:1,0,3,0:1,3:58,0,36:36    0/0:0,24,246:8:0:0:7,1,0,0:8,0:0,35,274:35  0/0:0,18,147:6:0:0:6,0,0,0:6,0:0,29,175:29  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20    0/1:59,0,62:5:2:0:3,0,2,0:3,2:47,0,79:47    0/0:0,33,192:11:0:0:11,0,0,0:11,0:0,44,220:44   0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,20,122:20    0/0:0,24,198:8:0:0:7,1,0,0:8,0:0,35,226:35  0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,23,148:23  0/0:0,15,165:5:0:0:3,2,0,0:5,0:0,26,193:26  0/0:0,24,172:8:0:0:8,0,0,0:8,0:0,35,200:35  0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,14,59:14 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,17,94:17 0/0:0,15,118:5:0:0:5,0,0,0:5,0:0,26,146:26  0/0:0,33,178:11:0:0:11,0,0,0:11,0:0,44,206:44   0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,9,74:3:0:0:2,1,0,0:3,0:0,20,102:20    0/0:0,21,168:7:0:0:5,2,0,0:7,0:0,32,196:32  0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,14,68:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,17,117:17    0/0:0,12,130:4:0:0:2,2,0,0:4,0:0,23,158:23  0/0:0,9,78:3:0:0:1,2,0,0:3,0:0,20,106:20    0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,17,93:17 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,17,83:17 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,14,57:14 0/0:0,36,194:12:0:0:11,1,0,0:12,0:0,47,222:47   0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,23,138:23  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,182:6:0:0:6,0,0,0:6,0:0,29,210:29  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,14,62:14 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,9,107:3:0:0:3,0,0,0:3,0:0,20,135:20   0/0:0,6,67:2:0:0:1,1,0,0:2,0:0,17,95:17 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,27,184:9:0:0:9,0,0,0:9,0:0,38,212:38  0/0:0,9,85:3:0:0:3,0,0,0:3,0:0,20,113:20    0/0:0,12,111:4:0:0:4,0,0,0:4,0:0,23,139:23  0/0:0,6,77:2:0:0:1,1,0,0:2,0:0,17,105:17    0/0:0,12,108:4:0:0:3,1,0,0:4,0:0,23,136:23  0/0:0,3,27:1:0:0:1,0,0,0:1,0:0,14,55:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,12,125:4:0:0:4,0,0,0:4,0:0,23,153:23  0/0:0,9,98:3:0:0:2,1,0,0:3,0:0,20,126:20    0/0:0,6,46:2:0:0:2,0,0,0:2,0:0,17,74:17
+KB704451    4157909 .   T   G   278.0   .   DP=295;VDB=0.184881;SGB=22.7413;RPB=0.646301;MQB=0.998034;MQSB=0.200514;BQB=0.321842;MQ0F=0;DPR=247,15;ICB=0.00558284;HOB=0.00255102;AC=4;AN=112;DP4=218,29,15,1;MQ=59;SNP;VARTYPE=SNP  GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,87,255:29:0:0:24,5,0,0:29,0:0,97,282:97   0/0:0,27,183:9:0:0:9,0,0,0:9,0:0,37,210:37  0/1:156,0,167:19:8:0:11,0,7,1:11,8:145,0,183:127    0/1:75,0,107:5:2:0:2,1,2,0:3,2:64,0,123:64  0/0:0,27,191:9:0:0:8,1,0,0:9,0:0,37,218:37  0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,16,107:16    0/0:0,12,119:4:0:0:3,1,0,0:4,0:0,22,146:22  0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,15,126:5:0:0:5,0,0,0:5,0:0,25,153:25  0/0:0,12,132:4:0:0:2,2,0,0:4,0:0,22,159:22  0/0:0,12,133:4:0:0:4,0,0,0:4,0:0,22,160:22  0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,16,94:16 0/1:79,9,0:3:3:0:0,0,3,0:0,3:60,0,8:8   0/0:0,21,230:7:0:0:6,1,0,0:7,0:0,31,257:31  0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,28,171:28  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,16,80:16 0/1:59,0,64:5:2:0:3,0,2,0:3,2:48,0,80:48    0/0:0,33,180:11:0:0:11,0,0,0:11,0:0,43,207:43   0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22  0/0:0,24,190:8:0:0:7,1,0,0:8,0:0,34,217:34  0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22  0/0:0,15,164:5:0:0:3,2,0,0:5,0:0,25,191:25  0/0:0,24,161:8:0:0:8,0,0,0:8,0:0,34,188:34  0/0:0,3,32:1:0:0:0,1,0,0:1,0:0,13,59:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,16,92:16 0/0:0,15,121:5:0:0:5,0,0,0:5,0:0,25,148:25  0/0:0,30,174:10:0:0:10,0,0,0:10,0:0,40,201:40   0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,21,164:7:0:0:5,2,0,0:7,0:0,31,191:31  0/0:0,3,37:1:0:0:1,0,0,0:1,0:0,13,64:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,16,116:16    0/0:0,12,128:4:0:0:2,2,0,0:4,0:0,22,155:22  0/0:0,9,94:3:0:0:1,2,0,0:3,0:0,19,121:19    0/0:0,6,63:2:0:0:1,1,0,0:2,0:0,16,90:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,16,83:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,36,193:12:0:0:11,1,0,0:12,0:0,46,220:46   0/0:0,12,108:4:0:0:4,0,0,0:4,0:0,22,135:22  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,168:6:0:0:6,0,0,0:6,0:0,28,195:28  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,13,58:13 0/0:0,6,47:2:0:0:2,0,0,0:2,0:0,16,74:16 0/0:8,11,65:2:1:0:1,0,1,0:1,0:0,13,84:13    0/0:0,6,64:2:0:0:1,1,0,0:2,0:0,16,91:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,27,177:9:0:0:9,0,0,0:9,0:0,37,204:37  0/0:0,6,50:2:0:0:2,0,0,0:2,0:0,16,77:16 0/0:0,12,101:4:0:0:4,0,0,0:4,0:0,22,128:22  0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,16,92:16 0/0:0,12,100:4:0:0:3,1,0,0:4,0:0,22,127:22  0/0:0,3,31:1:0:0:1,0,0,0:1,0:0,13,58:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,19,111:19    0/0:0,3,32:1:0:0:1,0,0,0:1,0:0,13,59:13 0/0:0,12,104:4:0:0:4,0,0,0:4,0:0,22,131:22  0/0:0,6,66:2:0:0:1,1,0,0:2,0:0,16,93:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0
+KB704451    4157927 .   G   A   4.88727 .   DP=334;VDB=0.38;SGB=3.29913;RPB=0.454248;MQB=0.970588;MQSB=0.546099;BQB=0.215686;MQ0F=0;DPR=306,2;ICB=0.000310486;HOB=0.000153894;AC=1;AN=114;DP4=265,41,2,0;MQ=59;SNP;VARTYPE=SNP  GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/0:0,105,255:35:0:0:31,4,0,0:35,0:0,122,295:122    0/0:0,45,255:15:0:0:14,1,0,0:15,0:0,62,295:62   0/0:0,60,255:20:0:0:17,3,0,0:20,0:0,77,295:77   0/0:0,21,242:7:0:0:6,1,0,0:7,0:0,38,282:38  0/0:0,24,207:8:0:0:6,2,0,0:8,0:0,41,247:41  0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,23,112:23    0/0:0,24,215:8:0:0:6,2,0,0:8,0:0,41,255:41  0/0:0,6,70:2:0:0:1,1,0,0:2,0:0,23,111:23    0/0:0,30,191:10:0:0:10,0,0,0:10,0:0,47,231:47   0/0:0,15,163:5:0:0:3,2,0,0:5,0:0,32,203:32  0/0:0,15,151:5:0:0:5,0,0,0:5,0:0,32,191:32  0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,23,111:23    0/0:0,12,102:4:0:0:4,0,0,0:4,0:0,29,142:29  0/0:0,24,255:8:0:0:6,2,0,0:8,0:0,41,295:41  0/0:0,21,189:7:0:0:7,0,0,0:7,0:0,38,229:38  0/0:0,3,35:1:0:0:0,1,0,0:1,0:0,20,76:20 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,20,81:20 0/0:0,12,126:4:0:0:3,1,0,0:4,0:0,29,166:29  0/0:0,39,255:13:0:0:12,1,0,0:13,0:0,56,295:56   0/0:0,21,206:7:0:0:6,1,0,0:7,0:0,38,246:38  0/0:0,30,238:10:0:0:8,2,0,0:10,0:0,47,278:47    0/0:0,18,145:6:0:0:6,0,0,0:6,0:0,35,185:35  0/0:0,24,244:8:0:0:6,2,0,0:8,0:0,41,284:41  0/0:0,24,195:8:0:0:7,1,0,0:8,0:0,41,235:41  0/0:0,3,27:1:0:0:0,1,0,0:1,0:0,20,68:20 0/0:0,6,62:2:0:0:2,0,0,0:2,0:0,23,103:23    0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,23,105:23    0/0:0,15,123:5:0:0:5,0,0,0:5,0:0,32,163:32  0/0:0,33,184:11:0:0:11,0,0,0:11,0:0,50,224:50   0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,18,165:6:0:0:4,2,0,0:6,0:0,35,205:35  0/0:0,3,38:1:0:0:1,0,0,0:1,0:0,20,79:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,117:3:0:0:3,0,0,0:3,0:0,26,157:26   0/0:0,12,121:4:0:0:2,2,0,0:4,0:0,29,161:29  0/0:0,9,95:3:0:0:1,2,0,0:3,0:0,26,135:26    0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,20,82:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23    0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 0/0:0,36,213:12:0:0:11,1,0,0:12,0:0,53,253:53   0/0:0,15,152:5:0:0:4,1,0,0:5,0:0,32,192:32  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,215:7:0:0:6,1,0,0:7,0:0,38,255:38  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,20,72:20 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23    0/0:0,9,101:3:0:0:3,0,0,0:3,0:0,26,141:26   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23    0/0:0,27,179:9:0:0:9,0,0,0:9,0:0,44,219:44  0/0:0,9,92:3:0:0:3,0,0,0:3,0:0,26,132:26    0/0:0,12,112:4:0:0:4,0,0,0:4,0:0,29,152:29  0/0:0,6,58:2:0:0:1,1,0,0:2,0:0,23,99:23 0/0:0,15,123:5:0:0:4,1,0,0:5,0:0,32,163:32  0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,78:3:0:0:3,0,0,0:3,0:0,26,118:26    0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,20,75:20 0/1:47,0,51:3:2:0:1,0,2,0:1,2:29,0,74:29    0/0:0,9,80:3:0:0:2,1,0,0:3,0:0,26,120:26    0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,23,94:23
+KB704451    4157938 .   ATTT    ATTTT   650.0   .   INDEL;IDV=18;IMF=0.428571;DP=361;VDB=0.773794;SGB=32.6744;MQSB=0.993251;MQ0F=0.00831025;DPR=115,60;ICB=0.929833;HOB=0.0258;AC=23;AN=100;DP4=98,17,48,12;MQ=59;INS;VARTYPE=INS   GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/1:124,0,7:19:16:0:3,0,13,3:3,16:123,0,13:13   0/1:14,3,0:1:1:0:0,0,1,0:0,1:12,1,5:4   0/0:0,9,55:3:0:0:3,0,0,0:3,0:0,9,62:9   0/0:0,21,146:7:0:0:5,2,0,0:7,0:0,21,153:21  0/0:0,18,126:6:0:0:4,2,0,0:6,0:0,18,133:18  0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7   0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9   1/1:46,9,0:3:3:0:0,0,1,2:0,3:40,4,1:4   0/0:0,27,155:9:0:0:8,1,0,0:9,0:0,27,162:27  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7   0/1:24,3,0:1:1:0:0,0,1,0:0,1:22,1,5:5   0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   0/0:0,9,84:3:0:0:2,1,0,0:3,0:0,9,91:9   0/0:0,15,111:5:0:0:5,0,0,0:5,0:0,15,118:15  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:32,6,0:2:2:0:0,0,1,1:0,2:28,2,3:3   0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,7,62:7   0/1:63,0,61:7:4:0:3,0,3,1:3,4:62,0,67:61    0/0:0,24,154:8:0:0:7,1,0,0:8,0:0,24,161:24  0/1:16,0,94:7:2:0:5,0,2,0:5,2:15,0,100:15   0/1:61,0,101:9:4:0:4,1,3,1:5,4:60,0,107:60  0/0:0,3,31:1:0:0:1,0,0,0:1,0:1,5,39:5   0/1:13,3,0:1:1:0:0,0,0,1:0,1:11,1,5:4   0/1:16,0,47:3:1:0:2,0,1,0:2,1:15,0,53:15    0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   0/1:46,0,57:5:2:0:3,0,2,0:3,2:45,0,63:45    0/1:50,0,12:6:5:0:1,0,5,0:1,5:49,0,18:18    0/0:0,3,30:1:0:0:1,0,0,0:1,0:1,5,38:5   0/0:0,3,4:1:0:0:1,0,0,0:1,0:1,5,12:4    0/1:18,0,98:5:1:0:2,2,1,0:4,1:17,0,104:17   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:15,0,26:2:1:0:1,0,1,0:1,1:14,0,32:14    0/1:20,0,26:2:1:0:1,0,0,1:1,1:19,0,32:19    0/0:0,6,60:2:0:0:1,1,0,0:2,0:0,7,67:7   0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:33,3,0:1:1:0:0,0,1,0:0,1:31,1,5:5   0/1:82,6,0:7:6:0:1,0,5,1:1,6:78,2,3:3   0/0:0,15,126:5:0:0:4,1,0,0:5,0:0,15,133:15  ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:34,6,0:2:2:0:0,0,2,0:0,2:30,2,3:3   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,50:2:0:0:1,1,0,0:2,0:0,7,57:7   0/0:0,3,28:1:0:0:1,0,0,0:1,0:1,5,36:5   0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7   0/0:0,18,124:6:0:0:5,1,0,0:6,0:0,18,131:18  0/1:53,6,0:2:2:0:0,0,2,0:0,2:49,2,3:3   0/0:0,12,96:4:0:0:4,0,0,0:4,0:0,12,103:12   0/1:25,3,0:1:1:0:0,0,1,0:0,1:23,1,5:5   1/1:61,9,0:3:3:0:0,0,2,1:0,3:55,4,1:4   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:1,5,39:5   0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7   0/0:0,3,29:1:0:0:1,0,0,0:1,0:1,5,37:5   0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5   0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9   ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0
+KB704451    4157940 .   TTGTGTGTGTGTGT  TTGTGTGTGTGTGTGTGT,TTTCTGTGTGTGTGTGT    999.0   .   INDEL;IDV=7;IMF=0.5;DP=366;VDB=0.0431342;SGB=14.7456;MQSB=0.996953;MQ0F=0.010929;DPR=86,41,8;ICB=0.963728;HOB=0.02;AC=21,6;AN=90;DP4=70,16,39,10;MQ=58;INS;VARTYPE=INS,INS  GT:PL:DP:DV:SP:DP4:DPR:GP:GQ    0/2:60,60,60,3,3,0:1:1:0:0,0,1,0:0,0,1:54,55,61,2,5,9:4 1/1:255,18,0,255,18,255:6:6:0:0,0,3,3:0,6,0:248,11,0,252,19,263:11  0/0:0,6,62,6,62,62:2:0:0:2,0,0,0:2,0,0:1,7,70,11,71,78:6    0/1:9,0,238,27,241,255:7:1:0:4,2,1,0:6,1,0:8,0,245,31,249,270:8 0/0:0,12,185,12,185,185:4:0:0:3,1,0,0:4,0,0:0,12,192,16,193,200:11  0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  0/1:1,0,158,10,161,165:4:1:0:2,1,1,0:3,1,0:3,2,167,16,171,182:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,24,255,24,255,255:8:0:0:7,1,0,0:8,0,0:0,24,262,28,263,270:23  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:37,0,54,40,57,94:2:1:0:1,0,1,0:1,1,0:36,0,60,44,64,108:35   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:117,6,0,117,6,117:2:2:0:0,0,1,1:0,2,0:113,3,3,118,10,128:2  0/0:0,18,237,18,237,237:6:0:0:5,1,0,0:6,0,0:0,18,244,22,245,252:17  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/2:120,120,120,6,6,0:2:2:0:0,0,1,1:0,0,2:111,112,119,2,6,7:3   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/1:255,21,0,255,21,255:7:7:0:0,0,5,2:0,7,0:247,14,0,252,22,263:13  0/2:19,31,181,0,157,154:5:1:0:3,1,1,0:4,0,1:14,27,183,0,160,164:14  0/1:120,0,255,141,255,255:10:3:0:6,1,2,1:7,3,0:119,0,261,145,262,269:119    0/0:0,15,212,15,212,212:5:0:0:5,0,0,0:5,0,0:0,15,219,19,220,227:14  0/0:0,15,243,15,243,243:5:0:0:4,1,0,0:5,0,0:0,15,250,19,251,258:14  1/1:40,9,0,40,9,40:3:3:0:0,0,3,0:0,3,0:34,4,2,39,12,50:3    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  0/1:54,0,54,57,57,111:2:1:0:1,0,1,0:1,1,0:53,0,60,61,64,125:51  0/0:0,9,139,9,139,139:3:0:0:3,0,0,0:3,0,0:0,10,146,14,147,154:8 1/1:243,15,0,243,15,243:5:5:0:0,0,4,1:0,5,0:236,9,0,241,16,251:8    ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:55,1,0,58,3,57:2:1:0:1,0,1,0:1,1,0:54,1,7,62,11,72:5    0/0:0,9,170,9,170,170:3:0:0:1,2,0,0:3,0,0:0,10,177,14,178,185:8 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:60,3,0,60,3,60:1:1:0:0,0,0,1:0,1,0:58,2,5,63,9,73:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,15,206,15,206,206:5:0:0:4,1,0,0:5,0,0:0,15,213,19,214,221:14  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   1/2:134,105,96,40,0,34:5:5:0:0,0,5,0:0,3,2:125,97,95,36,0,41:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/1:58,6,0,58,6,58:2:2:0:0,0,2,0:0,2,0:54,3,3,59,10,69:2    0/0:0,6,102,6,102,102:2:0:0:1,1,0,0:2,0,0:1,7,110,11,111,118:6  0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:98,0,88,104,94,191:4:2:0:1,1,2,0:2,2,0:97,0,94,108,101,205:92   0/2:35,35,35,3,3,0:1:1:0:0,0,1,0:0,0,1:29,30,36,2,5,9:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/2:45,45,45,3,3,0:1:1:0:0,0,1,0:0,0,1:39,40,46,2,5,9:4 0/0:0,3,60,3,60,60:1:0:0:0,1,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0   0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,120,6,120,120:2:0:0:1,1,0,0:2,0,0:1,7,128,11,129,136:6  ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 3782d12..729d6ee 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -457,6 +457,27 @@ def test_samples(self):
         self.assertEqual(self.reader.samples, self.samples)
 
 
+class TestMetadataWhitespace(unittest.TestCase):
+    filename = 'metadata-whitespace.vcf'
+    def test_metadata_whitespace(self):
+        """
+        Test parsing metadata header lines with whitespace.
+        """
+        self.reader = vcf.Reader(fh(self.filename))
+
+        # Pick one INFO line and assert that we parsed it correctly.
+        info_indel = self.reader.infos['INDEL']
+        assert info_indel.id == 'INDEL'
+        assert info_indel.num == 0
+        assert info_indel.type == 'Flag'
+        assert info_indel.desc == 'Indicates that the variant is an INDEL.'
+
+        # Test we can walk the file at least.
+        for r in self.reader:
+            for c in r:
+                pass
+
+
 class TestMixedFiltering(unittest.TestCase):
     filename = 'mixed-filtering.vcf'
     def test_mixed_filtering(self):
@@ -1470,6 +1491,7 @@ def test_write_uncalled(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMetadataWhitespace))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMixedFiltering))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))

From ec193b12b9443dc40b0ea49cde134fe60915b60f Mon Sep 17 00:00:00 2001
From: cariaso <cariaso@gmail.com>
Date: Sun, 19 Apr 2015 17:53:49 -0400
Subject: [PATCH 123/168] Enable compression to be disabled for .gz filenames

https://github.com/jamescasbon/PyVCF/issues/198#issuecomment-94317496
---
 vcf/parser.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 2124798..bf55128 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -233,7 +233,7 @@ def read_meta(self, meta_string):
 class Reader(object):
     """ Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
 
-    def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False,
+    def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False,
                  strict_whitespace=False):
         """ Create a new Reader for a VCF file.
 
@@ -256,9 +256,11 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
             self._reader = fsock
             if filename is None and hasattr(fsock, 'name'):
                 filename = fsock.name
-                compressed = compressed or filename.endswith('.gz')
+                if compressed is None:
+                    compressed = filename.endswith('.gz')
         elif filename:
-            compressed = compressed or filename.endswith('.gz')
+            if compressed is None:
+                compressed = filename.endswith('.gz')
             self._reader = open(filename, 'rb' if compressed else 'rt')
         self.filename = filename
         if compressed:

From df454bcebd3ee6b376f9331264cab5e64c43a453 Mon Sep 17 00:00:00 2001
From: Michele Mattioni <mattions@gmail.com>
Date: Fri, 24 Jul 2015 15:15:33 +0100
Subject: [PATCH 124/168] Bump the version to development mode

---
 vcf/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/__init__.py b/vcf/__init__.py
index 149d25a..88842b6 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -190,4 +190,4 @@
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 from vcf.sample_filter import SampleFilter
 
-VERSION = '0.6.7'
+VERSION = '0.6.8.dev0'

From 6500a9ac099e8f97f2313b86cb1a118fbb254840 Mon Sep 17 00:00:00 2001
From: Harriet Dashnow <h.dashnow@gmail.com>
Date: Fri, 21 Aug 2015 16:17:07 +1000
Subject: [PATCH 125/168] Chang self.data comment. It returns namedtuple not
 dict

---
 vcf/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/model.py b/vcf/model.py
index c1d5710..f523c24 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -20,7 +20,7 @@ def __init__(self, site, sample, data):
         self.site = site
         #: The sample name
         self.sample = sample
-        #: Dictionary of data from the VCF file
+        #: Namedtuple of data from the VCF file
         self.data = data
 
         if hasattr(self.data, 'GT'):

From dc23dbe74fa71323885da0604a625cc71673708c Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 15 Sep 2015 10:43:35 +0200
Subject: [PATCH 126/168] Configurable encoding reading compressed VCF files

The encoding used to read compressed VCF files was fixed to
ASCII, but is now configurable with the optional `encoding`
parameter to `vcf.Reader()`.

This is really a stopgap solution to read compressed VCF
files containing content that cannot be ASCII-decoded. More
changes are need to properly handle encoding/decoding issues
on both Python 2 and 3.

Fixes #201
---
 vcf/parser.py                 |   8 +++++---
 vcf/test/issue-201.vcf.gz     | Bin 0 -> 2639 bytes
 vcf/test/issue-201.vcf.gz.tbi | Bin 0 -> 129 bytes
 vcf/test/test_vcf.py          |  20 ++++++++++++++++++++
 4 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 vcf/test/issue-201.vcf.gz
 create mode 100644 vcf/test/issue-201.vcf.gz.tbi

diff --git a/vcf/parser.py b/vcf/parser.py
index bf55128..1c14694 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -234,7 +234,7 @@ class Reader(object):
     """ Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
 
     def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False,
-                 strict_whitespace=False):
+                 strict_whitespace=False, encoding='ascii'):
         """ Create a new Reader for a VCF file.
 
             You must specify either fsock (stream) or filename.  Gzipped streams
@@ -266,7 +266,7 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False
         if compressed:
             self._reader = gzip.GzipFile(fileobj=self._reader)
             if sys.version > '3':
-                self._reader = codecs.getreader('ascii')(self._reader)
+                self._reader = codecs.getreader(encoding)(self._reader)
 
         if strict_whitespace:
             self._separator = '\t'
@@ -295,6 +295,7 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False
         self._prepend_chr = prepend_chr
         self._parse_metainfo()
         self._format_cache = {}
+        self.encoding = encoding
 
     def __iter__(self):
         return self
@@ -617,7 +618,8 @@ def fetch(self, chrom, start=None, end=None):
             raise Exception('Please provide a filename (or a "normal" fsock)')
 
         if not self._tabix:
-            self._tabix = pysam.Tabixfile(self.filename)
+            self._tabix = pysam.Tabixfile(self.filename,
+                                          encoding=self.encoding)
 
         if self._prepend_chr and chrom[:3] == 'chr':
             chrom = chrom[3:]
diff --git a/vcf/test/issue-201.vcf.gz b/vcf/test/issue-201.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2af09fa65b70c3ecafbbb078b17aba0555cbc73c
GIT binary patch
literal 2639
zcmV-V3b6GbiwFb&00000{{{d;LjnLY3dNe;a@t51$FG^!$jwZGp<Df_P)d~sV@wra
z3&g3pQpq6O+7%Kb$(c~4yM3p9fZZ3{me2<R0Zxrf;{<G})#umeoIc(9qYodRY{z=?
z{IwZs58b=(xCMXt(7m5cMvduY-Wc3AX1%+HK3p{Z@l_u-?gqm}Z`K%$?<T)M)_^J@
zDo3p02Z#|408-ROztL@gMyoOL?PuFHo$iaTEnj@dI@ytVX!@ZBRZ1vPaA!8x@~g!k
z!ftVHu1_@3na^k9{h_PP#}A!{sRn)k0u&IxkoYZr#eYEnI^X&W4R%(K^YQfeSKC<)
zZZ(K_N1snSVkDh8y6w#W7<csXSnEc?*7e-&+ItO*@1}2?ZWs+G-y(f@dyjM_>W%09
zq;tR&1%fE_O*dHj_9l!^Eq3;iN+FLnz86}yYb?E$?LJ4i4PmrwQ3U#5y3wZ9cebJf
z@g8vk+lVVX66e~GC@c~u+63?@YSsb-8KoFi0D)>C2pDZ6H6y}=Cq$T&x`+seq|!yo
z2^TT7tuSH#y<CJu+A?woL{7$xao$E0gZJ|gQ#tt=AVqB>!qGbdL4w#JkjPxY0~M6>
zV3?$kg4z_Y<P&o>!mtgg5=Gzi*?2JS8;hAfo_A-1>B1OKjQK*JEsXBub~3IDmQ$4;
z!(su8XwC|im=&=$1|ULl&Z~mB4bfXktv_EacWD_<mKnbK)_G2E<2l~5y)+$T<*iNI
z{oLJ(gN_-j?d7Gp!Ao-^d0iI($p%nl12EYDL^gmb8vx4&pt1qr6#*)jAhomvNdUmI
z0g!9}E*pT#1`t~t$pl0;fFc`!RRSPm<J)V?__~<oySLHgc5pZ7&3<YNhnWJ18i}o7
zV2OXm+FIFWXswKuwKTUo|GTY^kn_wugp|ezubUwBEpu%=Tdwu@APB>NB{g_y1%WWW
z*GO2ufthUtuo~N5d4IXJS;v?n^=JhUKL1E5$ww_VtEcv=lWD$Htrv_k%uwW8p~xBG
z726Gc&y79uk&?nEVh>)sPY4HufGVF5oF}5Vcg0ms8H~G=(RA2b^y<gLNCct^i=jMJ
z<l0o?HVERAb104%B5H<5N&V4cDhAbwLby^SinyT{rWbd{qwU6?GXguTFddz2;)w1I
z2mwJb%3h*JG4eDPhV?`I{x@T+s46_^%|uUe59?vA1V3LB-vL3~Sd*iO!Zs6@5iiER
z6WkzHv^^!T<6F<Kj<>dMf7g^{XZa#JnOn}2rqC8D<U>pwegH9`Sf!jd^hW<D<U@pF
z`7pn_Bp-@%Z;}r&xF8?mBl+-Qc$-Lw{VPa_s{W-PZCX>`Qy>!W5vL%N5Y<c{L#aMi
z3(y4#@dyY4PBIBm&Gaag>QS{2UARF!b`j^9g!n!#x*#DQ0Wp<j65=~~=={d@7zpu9
zLVPcvGZG?FavqG66hg{QLKL5ggoxiNA=U(YT0+DmXN4qY#Wax+G1*Co<gF57Uy6j7
zM1HqATonMx22f-JFxdb^Hh?M{0LuoTvH{=~0nST^vH-Ab03;iL%Lbsb0SXCG76FkB
zpvVSbl>k?l5LK3$hfwU`1;RBY#LPAlcb&2A>q&_Ds4=`OA=V9cRzl2u!iDSVHhoq?
z%q^=3T@dSP$2ub+=Aok3rh1cvnBg&8f3(g@h()3x8k??LLhQ?t5KHw?iiB9y#CJeA
zD<KwR@ASvLC*S&011%8Chn($;iiN`WMp2PB{D3J0GeU8r-_R3HvELi_;%67NbOfIj
z1$W*kG?G|o%z7;Aj3m(+;R&6Q#%?gP#}q}*ozxjYqB9QXx1I5P)R~Mr(^0NE#%iM;
zt?ghJ$-S8LX8NL6qu_{P|BD`NTHS4+K)l%c5vNKjIL1$Q?bI}!s%dpGo!1<XnIPaS
z)f{7}oq&Z?0jn;k^EZ_hj#5D?JHDTz&MS|{OiWc%<?$W;bau;I!Gu97KE5Z@IsFmm
zNSGw)gm9`q;+_6T-l{*=$9qPCWH~ORF)rpi36kw3NcvU@QkNk??&7~&U9Jj%WCJL&
z0hnw6A{#)J4S;0>P}u<RiU4ONNNE6AHUN?hz-0qa*#Nl&DUE>022f-Juu6dIOOR@2
z9zw=qgzG-VsHYMn;<4>(KE;@h8dvICWeKuwunSKyras{u)b5iDPciD51W7K4eYInq
zeTp#+6}>jq8=qp_;W1i&v@Sfwm?g??^2iVj%hgMcV5q($!a2cEEymtiJ&xyhQ={LT
zO+S4yW^;|SIL?laUPCjqJ$GRSq2-GTZ>_Z3qe~0{@ULL$`IZJ+D#A*B{pLkn*cE=j
z^e=O3hy`Y8xuIBf_OHN<oiey<0*|_b0L1-$xBSq0^o4JWHX||Q`NQad^`SRkpfab*
zDalShdCux)WyN`)^aipMLvhnz?kPT!_K_W$+tZh=p{OyESXL*iRiHt&1rekL`PT8K
z$JlC%5tcPtt^y;7qq)*PmB9;WdN|JP6a}DkZa6ytKY>to3RjEZ`GJvY$51O|;C#@7
zyUaCB@sp3iv0ah5@p@^8+WepY`}cqU^Xyyh&Cv9NK%{lBbyvQ(wyK=Bv18r(bNnCf
zKKH%8^xOn{RYK;3awThKnj0@L?)1g|XrzhBFij@IP8I#!RUrPOnIA=rt5`WYiit?a
z-O*C7nmCd(3+IMb5Q=DtYz;ztsog*>LO@xWBz96YN7im;Ikx#|J9fAglXPYM&|p4j
z(ZjFsX6bqUZ(1hEXXkpNu&f`Qy?u$j(hhXs=7HR~_jjUQAEZku5da1FFs6P`kq1-H
z*=}AeS26A%^jS7zn{lq@&hV`#tzU7$?ckvtOHsJliH=Kj48n3+pQ|q_Q55x5MG||~
z!%4ugSU?W_9G8Zjhxco5)@qk=>ELgKN!r|dKT{_TOD8<&F_~bCruaFmkh<8GiV2m~
zNYo;v(j^rGWQ_CU5`DfhFi{2A{%oAAt!J~Sf$3r}o|V>*>1^`Q)5jHcA{}4J1Yt$a
z4O~(G#jYy7Khbn&WxZ@yR`Tw9A{?gJmM7<ag}*f`PdCRmqLX#}CDeYK_Deiv#X{>j
zrRH&)lU`0naDO<iG~f`l2x^u-iaHP{fA_Fzm%^@KHJ^(w&yXKZ{P>0-cXXVHuDQ0d
zZ{IF0waKcHr&Ut5s|%@^@`#=sf2k!VSUD&jc@P09>c)$&6-kp-k)`7X_Oq5}>sQx~
zch82oTxwGSDdx@H$4zcSY>PZMslg|TNLF=V;davrwY<uT7$kjYi+(<l;s*K=L{{pX
z`{h1%S~|d{_&Kpzsdg&^Tp>ktQ4-zaJI@3AqQvM!5mn><>qs9rTZajEB6KRi$D5>5
z0Z3ZqBhp=QTPv0hM|i=p9P4Ct&L-}e(js~$6je6Jb5LoC{pg9pzGou;qB>t>%HLDR
xMMl&Y#1Fr*{y!yZ-+6*Z001A02m}BC000301^_}s0stET0{{R300000002!9=1Bkm

literal 0
HcmV?d00001

diff --git a/vcf/test/issue-201.vcf.gz.tbi b/vcf/test/issue-201.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..23ffe4d9d4bb7aa713e12f966015680f9ad9e66b
GIT binary patch
literal 129
zcmb2|=3rp}f&Xj_PR>jWDGYCKZ4`8H5MaGv>%w)Rfvd+sY0}Bil{TU~4u?M6J9EGN
z4c)~4Tz^faS#xe3{ZTsGSoi!Zse6-6SMAjQ{Dui^6p(oE{Z;-|-cpXBY13618066`
Lk!E0qDP#ZufEFmv

literal 0
HcmV?d00001

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 729d6ee..8a496c8 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1192,6 +1192,25 @@ def testFetchesAllFromChromIfOnlyChromSpecified(self):
         )
 
 
+@unittest.skipUnless(pysam, "test requires installation of PySAM.")
+class TestIssue201(unittest.TestCase):
+    def setUp(self):
+        # This file contains some non-ASCII characters in a UTF-8 encoding.
+        # https://github.com/jamescasbon/PyVCF/issues/201
+        self.reader = vcf.Reader(fh('issue-201.vcf.gz', 'rb'),
+                                 encoding='utf-8')
+
+    def testIterate(self):
+        for record in self.reader:
+            # Should not raise decoding errors.
+            pass
+
+    def testFetch(self):
+        for record in self.reader.fetch(chrom='17'):
+            # Should not raise decoding errors.
+            pass
+
+
 class TestOpenMethods(unittest.TestCase):
 
     samples = 'NA00001 NA00002 NA00003'.split()
@@ -1496,6 +1515,7 @@ def test_write_uncalled(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFetch))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From 1cbbf58ea1ad745776d1ca5c55ee8151f7d725c4 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 8 Oct 2015 11:41:23 +0200
Subject: [PATCH 127/168] More robust parsing of meta-information lines

Fixes #210
---
 vcf/parser.py                |  9 +++++----
 vcf/test/parse-meta-line.vcf |  6 ++++++
 vcf/test/test_vcf.py         | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 vcf/test/parse-meta-line.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index 1c14694..862fbee 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -182,15 +182,16 @@ def read_contig(self, contig_string):
         return (match.group('id'), contig)
 
     def read_meta_hash(self, meta_string):
-        items = re.split("[<>]", meta_string)
-        # Removing initial hash marks and final equal sign
-        key = items[0][2:-1]
+        # assert re.match("##.+=<", meta_string)
+        items = meta_string.split('=', 1)
+        # Removing initial hash marks
+        key = items[0].lstrip('#')
         # N.B., items can have quoted values, so cannot just split on comma
         val = OrderedDict()
         state = 0
         k = ''
         v = ''
-        for c in items[1]:
+        for c in items[1].strip('[<>]'):
 
             if state == 0:  # reading item key
                 if c == '=':
diff --git a/vcf/test/parse-meta-line.vcf b/vcf/test/parse-meta-line.vcf
new file mode 100644
index 0000000..e3a2611
--- /dev/null
+++ b/vcf/test/parse-meta-line.vcf
@@ -0,0 +1,6 @@
+##fileformat=VCFv4.1
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##MYFIELD=<ID=SomeField,Version=3.4-0-g7e26428,Date="Wed Oct 07 09:11:47 CEST 2015",Options="< 4 and > 3">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample
+chr1	100	id1	G	A	.	.	NS=3	GT	0/1
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 8a496c8..63e972c 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -365,6 +365,38 @@ def test_write(self):
             self.assertEquals(l.INFO, r.INFO)
 
 
+class TestParseMetaLine(unittest.TestCase):
+    def test_parse(self):
+        reader = vcf.Reader(fh('parse-meta-line.vcf'))
+        f = reader.metadata['MYFIELD'][0]
+        self.assertEqual(f['ID'], 'SomeField')
+        self.assertEqual(f['Version'], '3.4-0-g7e26428')
+        self.assertEqual(f['Date'], '"Wed Oct 07 09:11:47 CEST 2015"')
+        self.assertEqual(f['Options'], '"< 4 and > 3"')
+        next(reader)
+
+    def test_write(self):
+        reader = vcf.Reader(fh('parse-meta-line.vcf'))
+        out = StringIO()
+        writer = vcf.Writer(out, reader)
+
+        records = list(reader)
+
+        for record in records:
+            writer.write_record(record)
+        out.seek(0)
+        reader2 = vcf.Reader(out)
+
+        f = reader2.metadata['MYFIELD'][0]
+        self.assertEqual(f['ID'], 'SomeField')
+        self.assertEqual(f['Version'], '3.4-0-g7e26428')
+        self.assertEqual(f['Date'], '"Wed Oct 07 09:11:47 CEST 2015"')
+        self.assertEqual(f['Options'], '"< 4 and > 3"')
+
+        for l, r in zip(records, reader2):
+            self.assertEquals(l.INFO, r.INFO)
+
+
 class TestGatkOutputWriter(unittest.TestCase):
 
     def testWrite(self):
@@ -1506,6 +1538,7 @@ def test_write_uncalled(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStringAsFlag))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoOrder))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoTypeCharacter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestParseMetaLine))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))

From 0253183e46d8a6a41ed6f73f58530d0f011792a7 Mon Sep 17 00:00:00 2001
From: alexjironkin <alexjironkin@gmail.com>
Date: Fri, 23 Oct 2015 10:10:58 +0100
Subject: [PATCH 128/168] Precopiled patterns for improved performance.

Patterns for row and ALT encoding are now pre-compiled instead of being compiled each time re.split and re.search is called. Increasing read performance.
---
 vcf/parser.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 862fbee..cc7a38c 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -273,6 +273,9 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False
             self._separator = '\t'
         else:
             self._separator = '\t| +'
+            
+        self._row_pattern = re.compile(self._separator)
+        self._alt_pattern = re.compile('[\[\]]')
 
         self.reader = (line.strip() for line in self._reader if line.strip())
 
@@ -507,9 +510,9 @@ def _parse_samples(self, samples, samp_fmt, site):
         return samp_data
 
     def _parse_alt(self, str):
-        if re.search('[\[\]]', str) is not None:
+        if self._alt_pattern.search(str) is not None:
             # Paired breakend
-            items = re.split('[\[\]]', str)
+            items = self._alt_pattern.split(str)
             remoteCoords = items[1].split(':')
             chr = remoteCoords[0]
             if chr[0] == '<':
@@ -537,7 +540,7 @@ def _parse_alt(self, str):
     def next(self):
         '''Return the next record in the file.'''
         line = self.reader.next()
-        row = re.split(self._separator, line.rstrip())
+        row = self._row_pattern.split(line.rstrip())
         chrom = row[0]
         if self._prepend_chr:
             chrom = 'chr' + chrom

From d15a375f55dcbdd37e20bf948827c879e5e63af3 Mon Sep 17 00:00:00 2001
From: redmar <R.R.vandenberg@nvwa.nl>
Date: Thu, 12 Nov 2015 14:27:04 +0100
Subject: [PATCH 129/168] Added vcf and testcases to demonstrate issue214

---
 vcf/test/issue-214.vcf | 32 ++++++++++++++++++++++++++++++++
 vcf/test/test_vcf.py   | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 vcf/test/issue-214.vcf

diff --git a/vcf/test/issue-214.vcf b/vcf/test/issue-214.vcf
new file mode 100644
index 0000000..dbc5fac
--- /dev/null
+++ b/vcf/test/issue-214.vcf
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.1
+##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
+##FILTER=<ID=LowQual,Description="Low quality">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=RGQ,Number=1,Type=Integer,Description="Unconditional reference genotype confidence, encoded as a phred quality -10*log10 p(genotype call is wrong)">
+##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample1	Sample2
+1	456904	.	T	C,*	6162.77	.	AC=1,1;AF=8.333e-03,8.333e-03;AN=120;DP=7693;FS=0.000;MLEAC=1,1;MLEAF=8.333e-03,8.333e-03;MQ=60.00;QD=31.36;SOR=0.976	GT:AD:DP:GQ:PL	0:106,0,0:106:99:0,1800,1800	0:110,0,0:110:99:0,1800,1800
+1	456940	.	*	C,T	6162.77	.	AC=1,1;AF=8.333e-03,8.333e-03;AN=120;DP=7693;FS=0.000;MLEAC=1,1;MLEAF=8.333e-03,8.333e-03;MQ=60.00;QD=31.36;SOR=0.976	GT:AD:DP:GQ:PL	0:106,0,0:106:99:0,1800,1800	0:110,0,0:110:99:0,1800,1800
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 63e972c..8fbe35a 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -229,7 +229,35 @@ def testParse(self):
             for s in r.samples:
                 s.phased
 
-
+class TestIssue214(unittest.TestCase):
+    """ See https://github.com/jamescasbon/PyVCF/issues/214 """
+    
+    def test_issue_214_is_snp(self):
+        reader=vcf.Reader(fh('issue-214.vcf'))
+        r=reader.next()
+        self.assertTrue(r.is_snp)
+
+    def test_issue_214_var_type(self):
+        reader=vcf.Reader(fh('issue-214.vcf'))
+        r=reader.next()
+        self.assertEqual(r.var_type,'snp')
+
+    # Can the ref even be a spanning deletion?
+    # Note, this does not trigger issue 214, but I've added it here for completeness
+    def test_issue_214_ref_is_del_is_snp(self):
+        reader=vcf.Reader(fh('issue-214.vcf'))
+        reader.next()
+        r=reader.next()
+        self.assertTrue(r.is_snp)
+
+    # Can the ref even be a spanning deletion?
+    # Note, this does not trigger issue 214, but I've added it here for completeness
+    def test_issue_214_ref_is_del_var_type(self):
+        reader=vcf.Reader(fh('issue-214.vcf'))
+        reader.next()
+        r=reader.next()
+        self.assertEqual(r.var_type,'snp')
+        
 class Test1kg(unittest.TestCase):
 
     def testParse(self):
@@ -1532,6 +1560,7 @@ def test_write_uncalled(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFreebayesOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamtoolsOutput))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue214))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGoNL))

From 41bd5b8db06d9820b58f7da78425aa4c6d7445cd Mon Sep 17 00:00:00 2001
From: redmar <R.R.vandenberg@nvwa.nl>
Date: Thu, 12 Nov 2015 14:43:08 +0100
Subject: [PATCH 130/168] Resolved issue214 by adding '*' to the allowed
 characters

---
 vcf/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/model.py b/vcf/model.py
index f523c24..33c77b2 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -376,7 +376,7 @@ def is_snp(self):
         for alt in self.ALT:
             if alt is None or alt.type != "SNV":
                 return False
-            if alt not in ['A', 'C', 'G', 'T', 'N']:
+            if alt not in ['A', 'C', 'G', 'T', 'N', '*']:
                 return False
         return True
 

From 2fc8e8664b42d3b7a33d08d3bfecf1fa57d3083c Mon Sep 17 00:00:00 2001
From: alexjironkin <alexjironkin@gmail.com>
Date: Fri, 13 Nov 2015 14:57:34 +0000
Subject: [PATCH 131/168] Additional switch to pre-compiled re patter.

Missed 1 split to be replaced with _row_pattern.split.
---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index cc7a38c..f7fc569 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -349,7 +349,7 @@ def _parse_metainfo(self):
 
             line = self.reader.next()
 
-        fields = re.split(self._separator, line[1:])
+        fields = self._row_pattern.split(line[1:])
         self._column_headers = fields[:9]
         self.samples = fields[9:]
         self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])

From 240ae07a21e05c8c2496c1c3c5a25e7940620032 Mon Sep 17 00:00:00 2001
From: "B. Arman Aksoy" <arman@aksoy.org>
Date: Mon, 16 Nov 2015 17:32:01 -0500
Subject: [PATCH 132/168] handle empty sample columns better

---
 vcf/cparse.pyx       |  2 +-
 vcf/parser.py        |  4 ++--
 vcf/test/strelka.vcf | 57 ++++++++++++++++++++++++++++++++++++++++++++
 vcf/test/test_vcf.py | 11 +++++++--
 4 files changed, 69 insertions(+), 5 deletions(-)
 create mode 100644 vcf/test/strelka.vcf

diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index a3cb4b3..8a71d64 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -39,7 +39,7 @@ def parse_samples(
             if samp_fmt._fields[j] == 'GT':
                 sampdat[j] = vals
                 continue
-            elif vals == '.':
+            elif not vals or vals == '.':
                 sampdat[j] = None
                 continue
 
diff --git a/vcf/parser.py b/vcf/parser.py
index f7fc569..a5625d7 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -273,7 +273,7 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False
             self._separator = '\t'
         else:
             self._separator = '\t| +'
-            
+
         self._row_pattern = re.compile(self._separator)
         self._alt_pattern = re.compile('[\[\]]')
 
@@ -466,7 +466,7 @@ def _parse_samples(self, samples, samp_fmt, site):
                 if samp_fmt._fields[i] == 'GT':
                     sampdat[i] = vals
                     continue
-                elif vals == ".":
+                elif not vals or vals == ".":
                     sampdat[i] = None
                     continue
 
diff --git a/vcf/test/strelka.vcf b/vcf/test/strelka.vcf
new file mode 100644
index 0000000..b5aea76
--- /dev/null
+++ b/vcf/test/strelka.vcf
@@ -0,0 +1,57 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=BCNoise,Description="Average fraction of filtered basecalls within 50 bases of the indel exceeds 0.3">
+##FILTER=<ID=QSI_ref,Description="Normal sample is not homozygous ref or sindel Q-score < 30, ie calls with NT!=ref or QSI_NT < 30">
+##FILTER=<ID=QSS_ref,Description="Normal sample is not homozygous ref or ssnv Q-score < 15, ie calls with NT!=ref or QSS_NT < 15">
+##FILTER=<ID=Repeat,Description="Sequence repeat of more than 8x in the reference sequence">
+##FILTER=<ID=SpanDel,Description="Fraction of reads crossing site with spanning deletions in either sample exceeeds 0.75">
+##FILTER=<ID=iHpol,Description="Indel overlaps an interrupted homopolymer longer than 14x in the reference sequence">
+##FORMAT=<ID=AU,Number=2,Type=Integer,Description="Number of 'A' alleles used in tiers 1,2">
+##FORMAT=<ID=CU,Number=2,Type=Integer,Description="Number of 'C' alleles used in tiers 1,2">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth for tier1">
+##FORMAT=<ID=DP2,Number=1,Type=Integer,Description="Read depth for tier2">
+##FORMAT=<ID=DP50,Number=1,Type=Float,Description="Average tier1 read depth within 50 bases">
+##FORMAT=<ID=FDP,Number=1,Type=Integer,Description="Number of basecalls filtered from original read depth for tier1">
+##FORMAT=<ID=FDP50,Number=1,Type=Float,Description="Average tier1 number of basecalls filtered from original read depth within 50 bases">
+##FORMAT=<ID=GU,Number=2,Type=Integer,Description="Number of 'G' alleles used in tiers 1,2">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Number of reads with deletions spanning this site at tier1">
+##FORMAT=<ID=SUBDP,Number=1,Type=Integer,Description="Number of reads below tier1 mapping quality threshold aligned across this site">
+##FORMAT=<ID=SUBDP50,Number=1,Type=Float,Description="Average number of reads below tier1 mapping quality threshold aligned across sites within 50 bases">
+##FORMAT=<ID=TAR,Number=2,Type=Integer,Description="Reads strongly supporting alternate allele for tiers 1,2">
+##FORMAT=<ID=TIR,Number=2,Type=Integer,Description="Reads strongly supporting indel allele for tiers 1,2">
+##FORMAT=<ID=TOR,Number=2,Type=Integer,Description="Other reads (weak support or insufficient indel breakpoint overlap) for tiers 1,2">
+##FORMAT=<ID=TU,Number=2,Type=Integer,Description="Number of 'T' alleles used in tiers 1,2">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=IC,Number=1,Type=Integer,Description="Number of times RU repeats in the indel allele">
+##INFO=<ID=IHP,Number=1,Type=Integer,Description="Largest reference interrupted homopolymer length intersecting with the indel">
+##INFO=<ID=NT,Number=1,Type=String,Description="Genotype of the normal in all data tiers, as used to classify somatic variants. One of {ref,het,hom,conflict}.">
+##INFO=<ID=OVERLAP,Number=0,Type=Flag,Description="Somatic indel possibly overlaps a second indel.">
+##INFO=<ID=QSI,Number=1,Type=Integer,Description="Quality score for any somatic variant, ie. for the ALT haplotype to be present at a significantly different frequency in the tumor and normal">
+##INFO=<ID=QSI_NT,Number=1,Type=Integer,Description="Quality score reflecting the joint probability of a somatic variant and NT">
+##INFO=<ID=QSS,Number=1,Type=Integer,Description="Quality score for any somatic snv, ie. for the ALT allele to be present at a significantly different frequency in the tumor and normal">
+##INFO=<ID=QSS_NT,Number=1,Type=Integer,Description="Quality score reflecting the joint probability of a somatic variant and NT">
+##INFO=<ID=RC,Number=1,Type=Integer,Description="Number of times RU repeats in the reference allele">
+##INFO=<ID=RU,Number=1,Type=String,Description="Smallest repeating sequence unit in inserted or deleted sequence">
+##INFO=<ID=SGT,Number=1,Type=String,Description="Most likely somatic genotype excluding normal noise states">
+##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Somatic mutation">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=TQSI,Number=1,Type=Integer,Description="Data tier used to compute QSI">
+##INFO=<ID=TQSI_NT,Number=1,Type=Integer,Description="Data tier used to compute QSI_NT">
+##INFO=<ID=TQSS,Number=1,Type=Integer,Description="Data tier used to compute QSS">
+##INFO=<ID=TQSS_NT,Number=1,Type=Integer,Description="Data tier used to compute QSS_NT">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##content=strelka somatic indel calls
+##fileDate=20151113
+##germlineIndelTheta=0.0001
+##germlineSnvTheta=0.001
+##priorSomaticIndelRate=1e-06
+##priorSomaticSnvRate=1e-06
+##reference=file:///b37.fasta
+##source=strelka
+##source_version=2.0.17.strelka1
+##startTime=Fri Nov 13 19:38:43 2015
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL.variant	NORMAL.variant2	TUMOR.variant	TUMOR.variant2
+1	1666175	.	C	T	.	PASS	AC=0;AF=0.00;AN=0;NT=ref;QSS=28;QSS_NT=28;SGT=CC->CT;SOMATIC;TQSS=1;TQSS_NT=1;set=variant	AU:CU:DP:FDP:GU:SDP:SUBDP:TU	0,0:42,42:43:0:0,0:0:0:1,1		0,0:45,45:59:0:0,0:0:0:14,14	
+1	3750492	.	G	A	.	PASS	AC=0;AF=0.00;AN=0;NT=ref;QSS=38;QSS_NT=38;SGT=GG->AG;SOMATIC;TQSS=2;TQSS_NT=2;set=variant	AU:CU:DP:FDP:GU:SDP:SUBDP:TU	0,0:0,0:116:0:116,116:0:0:0,0		6,6:0,0:96:0:90,91:0:0:0,0	
+1	9117626	.	G	A	.	PASS	AC=0;AF=0.00;AN=0;NT=ref;QSS=32;QSS_NT=32;SGT=GG->AG;SOMATIC;TQSS=1;TQSS_NT=1;set=variant	AU:CU:DP:FDP:GU:SDP:SUBDP:TU	0,0:0,0:165:0:165,166:0:0:0,0		6,6:0,0:132:0:126,127:0:0:0,0	
\ No newline at end of file
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 8fbe35a..8ad3c03 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -231,7 +231,7 @@ def testParse(self):
 
 class TestIssue214(unittest.TestCase):
     """ See https://github.com/jamescasbon/PyVCF/issues/214 """
-    
+
     def test_issue_214_is_snp(self):
         reader=vcf.Reader(fh('issue-214.vcf'))
         r=reader.next()
@@ -257,7 +257,7 @@ def test_issue_214_ref_is_del_var_type(self):
         reader.next()
         r=reader.next()
         self.assertEqual(r.var_type,'snp')
-        
+
 class Test1kg(unittest.TestCase):
 
     def testParse(self):
@@ -1553,6 +1553,12 @@ def test_write_uncalled(self):
         for (in_line, out_line) in zip(in_lines, out_lines):
             self.assertEqual(in_line,out_line)
 
+class TestStrelka(unittest.TestCase):
+
+    def test_strelka(self):
+        reader = vcf.Reader(fh('strelka.vcf'))
+        n = reader.next()
+        assert n is not None
 
 
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestVcfSpecs))
@@ -1585,3 +1591,4 @@ def test_write_uncalled(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUtils))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGATKMeta))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncalledGenotypes))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStrelka))

From 3c6d6744dee6844e3125f16a7f0ffb378fa13e3b Mon Sep 17 00:00:00 2001
From: Kaarel <krlk89@users.noreply.github.com>
Date: Sat, 19 Dec 2015 23:01:12 +0200
Subject: [PATCH 133/168] Update README.rst

---
 README.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index a60c0c8..aa1e0bd 100644
--- a/README.rst
+++ b/README.rst
@@ -50,7 +50,7 @@ of key=value pairs are converted to Python dictionaries, with flags being given
 a ``True`` value. Integers and floats are handled exactly as you'd expect::
 
     >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
-    >>> record = vcf_reader.next()
+    >>> record = next(vcf_reader)
     >>> print record.POS
     14370
     >>> print record.ALT
@@ -82,7 +82,7 @@ fields.  In case the FORMAT column does not exist, ``record.FORMAT`` is
 parsed sample column and ``record.genotype`` is a way of looking up genotypes
 by sample name::
 
-    >>> record = vcf_reader.next()
+    >>> record = next(vcf_reader)
     >>> for sample in record.samples:
     ...     print sample['GT']
     0|0
@@ -135,15 +135,14 @@ For example::
 ALT records are actually classes, so that you can interrogate them::
 
     >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
-    >>> _ = reader.next(); row = reader.next()
+    >>> _ = next(reader); row = next(reader)
     >>> print row
     Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
     >>> bnd = row.ALT[0]
     >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence
     True False True T
 
-Random access is supported for files with tabix indexes.  Simply call fetch for the
-region you are interested in::
+Random access is supported for files with tabix indexes. This requires the pysam module as a dependency.  Simply call fetch for the region you are interested in::
 
     >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
     >>> for record in vcf_reader.fetch('20', 1110696, 1230237):  # doctest: +SKIP

From 0a237900e12188d23385323f6ac5b1ad3d3eeeb7 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 22 Dec 2015 18:18:50 +0100
Subject: [PATCH 134/168] Test more Python versions on Travis CI

---
 .travis.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1fdfd54..b44e02c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,11 @@ python:
   - "3.2"
   - "3.3"
   - "3.4"
+  - "3.5"
+  - "nightly"
   - "pypy"
+  - "pypy3"
 install:
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install -r requirements/python2.6-requirements.txt; elif [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install -r requirements/python2.6-requirements.txt; elif [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]] || [[ $TRAVIS_PYTHON_VERSION == 'pypy3' ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi"
   - python setup.py install
 script: python setup.py test

From 7448188c3ce7f5ab384d2786b06d9e2d28e93aa9 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 22 Dec 2015 18:20:54 +0100
Subject: [PATCH 135/168] Enable containerized builds on Travis CI

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index b44e02c..fc795a3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,6 @@
 # Validate this file using http://lint.travis-ci.org/
 language: python
+sudo: false
 python:
   - "2.6"
   - "2.7"

From 3385c4cc931b75e97eca2708f76a5211ceac37ff Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 22 Dec 2015 18:24:05 +0100
Subject: [PATCH 136/168] Enable pip caching on Travis CI

---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index fc795a3..658f857 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,9 @@
 # Validate this file using http://lint.travis-ci.org/
 language: python
 sudo: false
+cache:
+  directories:
+    - $HOME/.cache/pip
 python:
   - "2.6"
   - "2.7"

From 1d9b4d6681874525ceb4b8fc1688088eeae960e3 Mon Sep 17 00:00:00 2001
From: Kaarel <krlk89@users.noreply.github.com>
Date: Wed, 23 Dec 2015 00:15:31 +0200
Subject: [PATCH 137/168] Update utils.py

---
 vcf/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/utils.py b/vcf/utils.py
index 456e5fa..2881dc2 100644
--- a/vcf/utils.py
+++ b/vcf/utils.py
@@ -28,7 +28,7 @@ def walk_together(*readers, **kwargs):
     nexts = []
     for reader in readers:
         try:
-            nexts.append(reader.next())
+            nexts.append(next(reader))
         except StopIteration:
             nexts.append(None)
 

From 7b298a7dfa18ffcda1c1d0f2cbfbeb0323795370 Mon Sep 17 00:00:00 2001
From: Kaarel <krlk89@users.noreply.github.com>
Date: Wed, 23 Dec 2015 00:16:51 +0200
Subject: [PATCH 138/168] Update parser.py

---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index a5625d7..e76150a 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -314,7 +314,7 @@ def _parse_metainfo(self):
 
         parser = _vcf_metadata_parser()
 
-        line = self.reader.next()
+        line = next(self.reader)
         while line.startswith('##'):
             self._header_lines.append(line)
 

From f7119cd0603ab065b3c6703350629c5402350f56 Mon Sep 17 00:00:00 2001
From: Kaarel <krlk89@users.noreply.github.com>
Date: Wed, 23 Dec 2015 00:18:05 +0200
Subject: [PATCH 139/168] Update test_vcf.py

---
 vcf/test/test_vcf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 8ad3c03..10b6c04 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -234,12 +234,12 @@ class TestIssue214(unittest.TestCase):
 
     def test_issue_214_is_snp(self):
         reader=vcf.Reader(fh('issue-214.vcf'))
-        r=reader.next()
+        r=next(reader)
         self.assertTrue(r.is_snp)
 
     def test_issue_214_var_type(self):
         reader=vcf.Reader(fh('issue-214.vcf'))
-        r=reader.next()
+        r=next(reader)
         self.assertEqual(r.var_type,'snp')
 
     # Can the ref even be a spanning deletion?

From 4733c85d2d85a9c8e36722affbcd1466fafc1716 Mon Sep 17 00:00:00 2001
From: Kaarel <krlk89@users.noreply.github.com>
Date: Wed, 23 Dec 2015 00:23:45 +0200
Subject: [PATCH 140/168] Update __init__.py

---
 vcf/__init__.py | 184 ------------------------------------------------
 1 file changed, 184 deletions(-)

diff --git a/vcf/__init__.py b/vcf/__init__.py
index 88842b6..c05058f 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -1,189 +1,5 @@
 #!/usr/bin/env python
-'''A VCFv4.0 and 4.1 parser for Python.
 
-Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/
-
-The intent of this module is to mimic the ``csv`` module in the Python stdlib,
-as opposed to more flexible serialization formats like JSON or YAML.  ``vcf``
-will attempt to parse the content of each record based on the data types
-specified in the meta-information lines --  specifically the ##INFO and
-##FORMAT lines.  If these lines are missing or incomplete, it will check
-against the reserved types mentioned in the spec.  Failing that, it will just
-return strings.
-
-There main interface is the class: ``Reader``.  It takes a file-like
-object and acts as a reader::
-
-    >>> import vcf
-    >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
-    >>> for record in vcf_reader:
-    ...     print record
-    Record(CHROM=20, POS=14370, REF=G, ALT=[A])
-    Record(CHROM=20, POS=17330, REF=T, ALT=[A])
-    Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
-    Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
-    Record(CHROM=20, POS=1234567, REF=GTCT, ALT=[G, GTACT])
-
-
-This produces a great deal of information, but it is conveniently accessed.
-The attributes of a Record are the 8 fixed fields from the VCF spec::
-
-    * ``Record.CHROM``
-    * ``Record.POS``
-    * ``Record.ID``
-    * ``Record.REF``
-    * ``Record.ALT``
-    * ``Record.QUAL``
-    * ``Record.FILTER``
-    * ``Record.INFO``
-
-plus attributes to handle genotype information:
-
-    * ``Record.FORMAT``
-    * ``Record.samples``
-    * ``Record.genotype``
-
-``samples`` and ``genotype``, not being the title of any column, are left lowercase.  The format
-of the fixed fields is from the spec.  Comma-separated lists in the VCF are
-converted to lists.  In particular, one-entry VCF lists are converted to
-one-entry Python lists (see, e.g., ``Record.ALT``).  Semicolon-delimited lists
-of key=value pairs are converted to Python dictionaries, with flags being given
-a ``True`` value. Integers and floats are handled exactly as you'd expect::
-
-    >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
-    >>> record = vcf_reader.next()
-    >>> print record.POS
-    14370
-    >>> print record.ALT
-    [A]
-    >>> print record.INFO['AF']
-    [0.5]
-
-There are a number of convenience methods and properties for each ``Record`` allowing you to
-examine properties of interest::
-
-    >>> print record.num_called, record.call_rate, record.num_unknown
-    3 1.0 0
-    >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
-    1 1 1
-    >>> print record.nucl_diversity, record.aaf, record.heterozygosity
-    0.6 [0.5] 0.5
-    >>> print record.get_hets()
-    [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
-    >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
-    True False True False
-    >>> print record.var_type, record.var_subtype
-    snp ts
-    >>> print record.is_monomorphic
-    False
-
-``record.FORMAT`` will be a string specifying the format of the genotype
-fields.  In case the FORMAT column does not exist, ``record.FORMAT`` is
-``None``.  Finally, ``record.samples`` is a list of dictionaries containing the
-parsed sample column and ``record.genotype`` is a way of looking up genotypes
-by sample name::
-
-    >>> record = vcf_reader.next()
-    >>> for sample in record.samples:
-    ...     print sample['GT']
-    0|0
-    0|1
-    0/0
-    >>> print record.genotype('NA00001')['GT']
-    0|0
-
-The genotypes are represented by ``Call`` objects, which have three attributes: the
-corresponding Record ``site``, the sample name in ``sample`` and a dictionary of
-call data in ``data``::
-
-     >>> call = record.genotype('NA00001')
-     >>> print call.site
-     Record(CHROM=20, POS=17330, REF=T, ALT=[A])
-     >>> print call.sample
-     NA00001
-     >>> print call.data
-     CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50])
-
-Please note that as of release 0.4.0, attributes known to have single values (such as
-``DP`` and ``GQ`` above) are returned as values.  Other attributes are returned
-as lists (such as ``HQ`` above).
-
-There are also a number of methods::
-
-    >>> print call.called, call.gt_type, call.gt_bases, call.phased
-    True 0 T|T True
-
-Metadata regarding the VCF file itself can be investigated through the
-following attributes:
-
-    * ``Reader.metadata``
-    * ``Reader.infos``
-    * ``Reader.filters``
-    * ``Reader.formats``
-    * ``Reader.samples``
-
-For example::
-
-    >>> vcf_reader.metadata['fileDate']
-    '20090805'
-    >>> vcf_reader.samples
-    ['NA00001', 'NA00002', 'NA00003']
-    >>> vcf_reader.filters
-    OrderedDict([('q10', Filter(id='q10', desc='Quality below 10')), ('s50', Filter(id='s50', desc='Less than 50% of samples have data'))])
-    >>> vcf_reader.infos['AA'].desc
-    'Ancestral Allele'
-
-ALT records are actually classes, so that you can interrogate them::
-
-    >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
-    >>> _ = reader.next(); row = reader.next()
-    >>> print row
-    Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
-    >>> bnd = row.ALT[0]
-    >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence
-    True False True T
-
-The Reader supports retrieval of records within designated regions for
-files with tabix indexes via the fetch method. Pass in a chromosome,
-and, optionally, start and end coordinates, for the regions of
-interest::
-
-    >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
-    >>> # fetch all records on chromosome 20 from base 1110696 through 1230237
-    >>> for record in vcf_reader.fetch('20', 1110695, 1230237):  # doctest: +SKIP
-    ...     print record
-    Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
-    Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
-
-Note that the start and end coordinates are in the zero-based, half-open
-coordinate system, similar to ``_Record.start`` and ``_Record.end``. The
-very first base of a chromosome is index 0, and the the region includes
-bases up to, but not including the base at the end coordinate. For
-example::
-
-    >>> # fetch all records on chromosome 4 from base 11 through 20
-    >>> vcf_reader.fetch('4', 10, 20)   # doctest: +SKIP
-
-would include all records overlapping a 10 base pair region from the
-11th base of through the 20th base (which is at index 19) of chromosome
-4. It would not include the 21st base (at index 20). (See
-http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms for more
-information on the zero-based, half-open coordinate system.)
-
-The ``Writer`` class provides a way of writing a VCF file.  Currently, you must specify a
-template ``Reader`` which provides the metadata::
-
-    >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
-    >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
-    >>> for record in vcf_reader:
-    ...     vcf_writer.write_record(record)
-
-
-An extensible script is available to filter vcf files in vcf_filter.py.  VCF filters
-declared by other packages will be available for use in this script.  Please
-see :doc:`FILTERS` for full description.
-
-'''
 from vcf.parser import Reader, Writer
 from vcf.parser import VCFReader, VCFWriter
 from vcf.filters import Base as Filter

From 24fc5fe310a01093bc3cc52e32564de5fb128373 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 24 Dec 2015 11:11:54 +0100
Subject: [PATCH 141/168] More _.next() to next(_) changes

---
 vcf/parser.py        |  4 ++--
 vcf/test/test_vcf.py | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index e76150a..2cd8deb 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -347,7 +347,7 @@ def _parse_metainfo(self):
                         self.metadata[key] = []
                     self.metadata[key].append(val)
 
-            line = self.reader.next()
+            line = next(self.reader)
 
         fields = self._row_pattern.split(line[1:])
         self._column_headers = fields[:9]
@@ -539,7 +539,7 @@ def _parse_alt(self, str):
 
     def next(self):
         '''Return the next record in the file.'''
-        line = self.reader.next()
+        line = next(self.reader)
         row = self._row_pattern.split(line.rstrip())
         chrom = row[0]
         if self._prepend_chr:
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 10b6c04..20b71ad 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -246,16 +246,16 @@ def test_issue_214_var_type(self):
     # Note, this does not trigger issue 214, but I've added it here for completeness
     def test_issue_214_ref_is_del_is_snp(self):
         reader=vcf.Reader(fh('issue-214.vcf'))
-        reader.next()
-        r=reader.next()
+        next(reader)
+        r=next(reader)
         self.assertTrue(r.is_snp)
 
     # Can the ref even be a spanning deletion?
     # Note, this does not trigger issue 214, but I've added it here for completeness
     def test_issue_214_ref_is_del_var_type(self):
         reader=vcf.Reader(fh('issue-214.vcf'))
-        reader.next()
-        r=reader.next()
+        next(reader)
+        r=next(reader)
         self.assertEqual(r.var_type,'snp')
 
 class Test1kg(unittest.TestCase):
@@ -562,7 +562,7 @@ def test_num_calls(self):
             self.assertEqual(len(var.samples), num_calls)
 
     def test_dunder_eq(self):
-        rec = vcf.Reader(fh('example-4.0.vcf')).next()
+        rec = next(vcf.Reader(fh('example-4.0.vcf')))
         self.assertFalse(rec == None)
         self.assertFalse(None == rec)
 
@@ -892,7 +892,7 @@ def test_qual(self):
 
     def test_info_multiple_values(self):
         reader = vcf.Reader(fh('example-4.1-info-multiple-values.vcf'))
-        var = reader.next()
+        var = next(reader)
         # check Float type INFO field with multiple values
         expected = [19.3, 47.4, 14.0]
         actual = var.INFO['RepeatCopies']
@@ -1149,7 +1149,7 @@ class TestCall(unittest.TestCase):
 
     def test_dunder_eq(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
-        var = reader.next()
+        var = next(reader)
         example_call = var.samples[0]
         self.assertFalse(example_call == None)
         self.assertFalse(None == example_call)
@@ -1320,7 +1320,7 @@ def testCLIWithFilter(self):
         #print(buf.getvalue())
         reader = vcf.Reader(buf)
         self.assertEqual(reader.samples, ['NA00001'])
-        rec = reader.next()
+        rec = next(reader)
         self.assertEqual(len(rec.samples), 1)
 
     @unittest.skipUnless(IS_NOT_PYPY, "test broken for PyPy")
@@ -1342,7 +1342,7 @@ def testSampleFilterModule(self):
         # read output
         reader = vcf.Reader(buf)
         self.assertEqual(reader.samples, ['NA00001'])
-        rec = reader.next()
+        rec = next(reader)
         self.assertEqual(len(rec.samples), 1)
 
 
@@ -1401,7 +1401,7 @@ class TestRegression(unittest.TestCase):
 
     def test_issue_16(self):
         reader = vcf.Reader(fh('issue-16.vcf'))
-        n = reader.next()
+        n = next(reader)
         assert n.QUAL == None
 
     def test_null_mono(self):
@@ -1416,7 +1416,7 @@ def test_null_mono(self):
         out.seek(0)
         print(out.getvalue())
         p2 = vcf.Reader(out)
-        rec = p2.next()
+        rec = next(p2)
         assert rec.samples
 
 
@@ -1557,7 +1557,7 @@ class TestStrelka(unittest.TestCase):
 
     def test_strelka(self):
         reader = vcf.Reader(fh('strelka.vcf'))
-        n = reader.next()
+        n = next(reader)
         assert n is not None
 
 

From 95c907ecfa5ac9ba93ff722331cee2d53c4f36d0 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 24 Dec 2015 11:14:51 +0100
Subject: [PATCH 142/168] Misc doc updates

---
 README.rst       | 26 ++++++++++++++++--------
 docs/API.rst     | 14 ++++++-------
 docs/HISTORY.rst | 51 ++++++++++++++++++++++++------------------------
 docs/INTRO.rst   |  3 +--
 vcf/__init__.py  |  6 ++++++
 vcf/model.py     | 15 +++++++-------
 6 files changed, 65 insertions(+), 50 deletions(-)

diff --git a/README.rst b/README.rst
index aa1e0bd..67b5d1b 100644
--- a/README.rst
+++ b/README.rst
@@ -58,7 +58,7 @@ a ``True`` value. Integers and floats are handled exactly as you'd expect::
     >>> print record.INFO['AF']
     [0.5]
 
-There are a number of convienience methods and properties for each ``Record`` allowing you to
+There are a number of convenience methods and properties for each ``Record`` allowing you to
 examine properties of interest::
 
     >>> print record.num_called, record.call_rate, record.num_unknown
@@ -142,19 +142,31 @@ ALT records are actually classes, so that you can interrogate them::
     >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence
     True False True T
 
-Random access is supported for files with tabix indexes. This requires the pysam module as a dependency.  Simply call fetch for the region you are interested in::
+The Reader supports retrieval of records within designated regions for files
+with tabix indexes via the fetch method. This requires the pysam module as a
+dependency. Pass in a chromosome, and, optionally, start and end coordinates,
+for the regions of interest::
 
     >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
-    >>> for record in vcf_reader.fetch('20', 1110696, 1230237):  # doctest: +SKIP
+    >>> # fetch all records on chromosome 20 from base 1110696 through 1230237
+    >>> for record in vcf_reader.fetch('20', 1110695, 1230237):  # doctest: +SKIP
     ...     print record
     Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
     Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
 
-Or extract a single row::
+Note that the start and end coordinates are in the zero-based, half-open
+coordinate system, similar to ``_Record.start`` and ``_Record.end``. The very
+first base of a chromosome is index 0, and the the region includes bases up
+to, but not including the base at the end coordinate. For example::
 
-    >>> print vcf_reader.fetch('20', 1110696)  # doctest: +SKIP
-    Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
+    >>> # fetch all records on chromosome 4 from base 11 through 20
+    >>> vcf_reader.fetch('4', 10, 20)   # doctest: +SKIP
 
+would include all records overlapping a 10 base pair region from the 11th base
+of through the 20th base (which is at index 19) of chromosome 4. It would not
+include the 21st base (at index 20). (See
+http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms for more
+information on the zero-based, half-open coordinate system.)
 
 The ``Writer`` class provides a way of writing a VCF file.  Currently, you must specify a
 template ``Reader`` which provides the metadata::
@@ -164,8 +176,6 @@ template ``Reader`` which provides the metadata::
     >>> for record in vcf_reader:
     ...     vcf_writer.write_record(record)
 
-
 An extensible script is available to filter vcf files in vcf_filter.py.  VCF filters
 declared by other packages will be available for use in this script.  Please
 see :doc:`FILTERS` for full description.
-
diff --git a/docs/API.rst b/docs/API.rst
index 7ffc21a..d688893 100644
--- a/docs/API.rst
+++ b/docs/API.rst
@@ -14,43 +14,43 @@ vcf.Writer
    :members:
 
 vcf.model._Record
------------
+-----------------
 
 .. autoclass:: vcf.model._Record
    :members:
 
 vcf.model._Call
----------
+---------------
 
 .. autoclass:: vcf.model._Call
    :members:
 
 vcf.model._AltRecord
------------
+--------------------
 
 .. autoclass:: vcf.model._AltRecord
    :members:
 
 vcf.model._Substitution
------------
+-----------------------
 
 .. autoclass:: vcf.model._Substitution
    :members:
 
 vcf.model._SV
------------
+-------------
 
 .. autoclass:: vcf.model._SV
    :members:
 
 vcf.model._SingleBreakend
------------
+-------------------------
 
 .. autoclass:: vcf.model._SingleBreakend
    :members:
 
 vcf.model._Breakend
------------
+-------------------
 
 .. autoclass:: vcf.parser._Breakend
    :members:
diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index defff0d..8a97d8d 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -2,7 +2,7 @@ Development
 ===========
 
 Please use the `PyVCF repository <https://github.com/jamescasbon/PyVCF/>`_.
-Pull requests gladly accepted. 
+Pull requests gladly accepted.
 Issues should be reported at the github issue tracker.
 
 Running tests
@@ -10,7 +10,7 @@ Running tests
 
 Please check the tests by running them with::
 
-    python setup.py test 
+    python setup.py test
 
 New features should have test code sent with them.
 
@@ -20,7 +20,7 @@ Changes
 0.6.7 Release
 -------------
 
-* Include missing .pyx files 
+* Include missing .pyx files
 
 0.6.6 Release
 -------------
@@ -56,17 +56,17 @@ Changes
 -------------
 
 * cython port of #79
-* correct writing of meta lines #84 
+* correct writing of meta lines #84
 
 0.6.2 Release
 -------------
 
-* issues #78, #79 (thanks Sean, Brad) 
+* issues #78, #79 (thanks Sean, Brad)
 
 0.6.1 Release
 -------------
 
-* Add strict whitespace mode for well formed VCFs with spaces 
+* Add strict whitespace mode for well formed VCFs with spaces
   in sample names (thanks Marco)
 * Ignore blank lines in files (thanks Martijn)
 * Tweaks for handling missing data (thanks Sean)
@@ -76,9 +76,9 @@ Changes
 0.6.0 Release
 -------------
 
-* Backwards incompatible change: _Call.data is now a 
+* Backwards incompatible change: _Call.data is now a
   namedtuple (previously it was a dict)
-* Optional cython version, much improved performance.  
+* Optional cython version, much improved performance.
 * Improvements to writer (thanks @cmclean)
 * Improvements to inheritance of classes (thanks @lennax)
 
@@ -86,14 +86,14 @@ Changes
 0.5.0 Release
 -------------
 
-VCF 4.1 support: 
- * support missing genotype #28 (thanks @martijnvermaat)
- * parseALT for svs #42, #48 (thanks @dzerbino)
+* VCF 4.1 support:
+  - support missing genotype #28 (thanks @martijnvermaat)
+  - parseALT for svs #42, #48 (thanks @dzerbino)
 * `trim_common_suffix` method #22 (thanks @martijnvermaat)
 * Multiple metadata with the same key is stored (#52)
-Writer improvements
- * A/G in Number INFO fields #53 (thanks @lennax) 
- * Better output #55 (thanks @cmclean)
+* Writer improvements:
+  - A/G in Number INFO fields #53 (thanks @lennax)
+  - Better output #55 (thanks @cmclean)
 * Allow malformed INFO fields #49 (thanks @ilyaminkin)
 * Added bayes factor error bias VCF filter
 * Added docs on vcf_melt
@@ -103,14 +103,14 @@ Writer improvements
 0.4.6 Release
 -------------
 
-* Performance improvements (#47) 
+* Performance improvements (#47)
 * Preserve order of INFO column (#46)
 
 0.4.5 Release
 -------------
 
-* Support exponent syntax qual values (#43, #44) (thanks @martijnvermaat) 
-* Preserve order of header lines (#45) 
+* Support exponent syntax qual values (#43, #44) (thanks @martijnvermaat)
+* Preserve order of header lines (#45)
 
 0.4.4 Release
 -------------
@@ -139,15 +139,15 @@ Writer improvements
 0.4.0 Release
 -------------
 
-* Package structure 
+* Package structure
 * add ``vcf.utils`` module with ``walk_together`` method
-* samtools tests 
+* samtools tests
 * support Freebayes' non standard '.' for no call
-* fix vcf_melt  
+* fix vcf_melt
 * support monomorphic sites, add ``is_monomorphic`` method, handle null QUALs
-* filter support for files with monomorphic calls 
+* filter support for files with monomorphic calls
 * Values declared as single are no-longer returned in lists
-* several performance improvements 
+* several performance improvements
 
 
 0.3.0 Release
@@ -170,14 +170,14 @@ Documentation release
 
 * Add shebang to vcf_filter.py
 
-0.2 Release 
+0.2 Release
 -----------
 
 * Replace genotype dictionary with a ``Call`` object
 * Methods on ``Record`` and ``Call`` (thanks @arq5x)
 * Shortcut parse_sample when genotype is None
 
-0.1 Release 
+0.1 Release
 -----------
 
 * Added test code
@@ -188,7 +188,7 @@ Documentation release
 * Allow opening by filename as well as filesocket
 * Support fetching rows for tabixed indexed files
 * Performance improvements (see ``test/prof.py``)
-* Added extensible filter script (see FILTERS.md), vcf_filter.py 
+* Added extensible filter script (see FILTERS.md), vcf_filter.py
 
 Contributions
 =============
@@ -197,4 +197,3 @@ Project started by @jdoughertyii and taken over by @jamescasbon on 12th January
 Contributions from @arq5x, @brentp, @martijnvermaat, @ian1roberts, @marcelm.
 
 This project was supported by `Population Genetics <http://www.populationgenetics.com/>`_.
-
diff --git a/docs/INTRO.rst b/docs/INTRO.rst
index b61e9a9..2b9a587 100644
--- a/docs/INTRO.rst
+++ b/docs/INTRO.rst
@@ -1,5 +1,4 @@
 Introduction
 ============
 
-.. automodule:: vcf
-
+.. include:: ../README.rst
diff --git a/vcf/__init__.py b/vcf/__init__.py
index c05058f..75bee03 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -1,4 +1,10 @@
 #!/usr/bin/env python
+"""
+A VCFv4.0 and 4.1 parser for Python.
+
+Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/
+"""
+
 
 from vcf.parser import Reader, Writer
 from vcf.parser import VCFReader, VCFWriter
diff --git a/vcf/model.py b/vcf/model.py
index 33c77b2..ef1edb7 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -330,7 +330,7 @@ def nucl_diversity(self):
 
         Derived from:
         \"Population Genetics: A Concise Guide, 2nd ed., p.45\"
-          John Gillespie.
+        John Gillespie.
         """
         # skip if more than one alternate allele. assumes bi-allelic
         if len(self.ALT) > 1:
@@ -467,13 +467,14 @@ def var_type(self):
     def var_subtype(self):
         """
         Return the subtype of variant.
+
         - For SNPs and INDELs, yeild one of: [ts, tv, ins, del]
-        - For SVs yield either "complex" or the SV type defined
-          in the ALT fields (removing the brackets).
-          E.g.:
-               <DEL>       -> DEL
-               <INS:ME:L1> -> INS:ME:L1
-               <DUP>       -> DUP
+        - For SVs yield either "complex" or the SV type defined in the ALT
+          fields (removing the brackets). E.g.::
+
+              <DEL>       -> DEL
+              <INS:ME:L1> -> INS:ME:L1
+              <DUP>       -> DUP
 
         The logic is meant to follow the rules outlined in the following
         paragraph at:

From bfcedb9bad1a14074ac4526ffdb610611e073810 Mon Sep 17 00:00:00 2001
From: James Casbon <casbon@gmail.com>
Date: Fri, 18 Mar 2016 16:21:45 +0000
Subject: [PATCH 143/168] Cut release.

---
 setup.py        | 2 +-
 vcf/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index a266207..d8089c0 100644
--- a/setup.py
+++ b/setup.py
@@ -68,7 +68,7 @@
         'Programming Language :: Cython',
         'Programming Language :: Python',
         'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6'
+        'Programming Language :: Python :: 2.6',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.2',
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 75bee03..e1aae58 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -12,4 +12,4 @@
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 from vcf.sample_filter import SampleFilter
 
-VERSION = '0.6.8.dev0'
+VERSION = '0.6.8'

From 0b24f4d8b74b2f6fa6f29812ceed4f0645726813 Mon Sep 17 00:00:00 2001
From: Juan Medina <jsmedmar@gmail.com>
Date: Wed, 15 Jun 2016 11:22:57 -0400
Subject: [PATCH 144/168] allows  to match empty values. Fixes #234

---
 vcf/parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 2cd8deb..af6aeae 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -83,7 +83,7 @@ def __init__(self):
         super(_vcf_metadata_parser, self).__init__()
         self.info_pattern = re.compile(r'''\#\#INFO=<
             ID=(?P<id>[^,]+),\s*
-            Number=(?P<number>-?\d+|\.|[AGR]),\s*
+            Number=(?P<number>(?:(-?\d+|\.|[AGR]))?),\s*
             Type=(?P<type>Integer|Float|Flag|Character|String),\s*
             Description="(?P<desc>[^"]*)"
             (?:,\s*Source="(?P<source>[^"]*)")?
@@ -112,7 +112,7 @@ def __init__(self):
 
     def vcf_field_count(self, num_str):
         """Cast vcf header numbers to integer or None"""
-        if num_str is None:
+        if num_str is None or num_str == "":
             return None
         elif num_str not in field_counts:
             # Fixed, specified number

From 0da96626427bcc23f280b819dfba28ebe7bab18a Mon Sep 17 00:00:00 2001
From: Juan Medina <jsmedmar@gmail.com>
Date: Wed, 15 Jun 2016 15:43:48 -0400
Subject: [PATCH 145/168] Issue #234: simplifies regex and adds test case

---
 vcf/parser.py        |  4 ++--
 vcf/test/test_vcf.py | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index af6aeae..e23a66b 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -83,7 +83,7 @@ def __init__(self):
         super(_vcf_metadata_parser, self).__init__()
         self.info_pattern = re.compile(r'''\#\#INFO=<
             ID=(?P<id>[^,]+),\s*
-            Number=(?P<number>(?:(-?\d+|\.|[AGR]))?),\s*
+            Number=(?P<number>-?\d+|\.|[AGR])?,\s*
             Type=(?P<type>Integer|Float|Flag|Character|String),\s*
             Description="(?P<desc>[^"]*)"
             (?:,\s*Source="(?P<source>[^"]*)")?
@@ -112,7 +112,7 @@ def __init__(self):
 
     def vcf_field_count(self, num_str):
         """Cast vcf header numbers to integer or None"""
-        if num_str is None or num_str == "":
+        if num_str is None:
             return None
         elif num_str not in field_counts:
             # Fixed, specified number
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 20b71ad..afc90ec 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1271,6 +1271,22 @@ def testFetch(self):
             pass
 
 
+class TestIssue234(unittest.TestCase):
+    """ See https://github.com/jamescasbon/PyVCF/issues/234 """
+
+    def test_vcf_metadata_parser_doesnt_break_with_empty_number_tags(self):
+        parser = vcf.parser._vcf_metadata_parser()
+        num_str = '##INFO=<ID=CA,Number=,Type=Flag,Description="Position '
+        num_str += 'could not be annotated to a coding region of a transcript '
+        num_str += 'using the supplied bed file">'
+        try:
+            parser.read_info(num_str)
+        except SyntaxError:
+            msg = "vcf.parser._vcf_metadata_parser shouldn't raise SyntaxError"
+            msg += " if Number tag is empty."
+            self.fail(msg)
+
+
 class TestOpenMethods(unittest.TestCase):
 
     samples = 'NA00001 NA00002 NA00003'.split()
@@ -1584,6 +1600,7 @@ def test_strelka(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFetch))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue234))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From 4275b18224bf4a07c5bd09d7389872ae0d80c72b Mon Sep 17 00:00:00 2001
From: Juan Medina <jsmedmar@gmail.com>
Date: Wed, 15 Jun 2016 15:51:46 -0400
Subject: [PATCH 146/168] Asserts that num attribute is None for #234

---
 vcf/test/test_vcf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index afc90ec..2064bf8 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1280,7 +1280,8 @@ def test_vcf_metadata_parser_doesnt_break_with_empty_number_tags(self):
         num_str += 'could not be annotated to a coding region of a transcript '
         num_str += 'using the supplied bed file">'
         try:
-            parser.read_info(num_str)
+            info = parser.read_info(num_str)[1]
+            self.assertIsNone(info.num)
         except SyntaxError:
             msg = "vcf.parser._vcf_metadata_parser shouldn't raise SyntaxError"
             msg += " if Number tag is empty."

From 21a52d2d8f71516b02c3e119d745dfd8f097f02b Mon Sep 17 00:00:00 2001
From: rwness <rwness@gmail.com>
Date: Tue, 28 Jun 2016 23:16:55 -0400
Subject: [PATCH 147/168] Update parser.py

I think this was a typo and gives a misleading error message
---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index e23a66b..cff1adf 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -151,7 +151,7 @@ def read_alt(self, alt_string):
         match = self.alt_pattern.match(alt_string)
         if not match:
             raise SyntaxError(
-                "One of the FILTER lines is malformed: %s" % alt_string)
+                "One of the ALT lines is malformed: %s" % alt_string)
 
         alt = _Alt(match.group('id'), match.group('desc'))
 

From f40482793fdbc9034cb905bef4aef8464ce02dbb Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Thu, 21 Jul 2016 19:03:02 +0200
Subject: [PATCH 148/168] Fixes for GitHub issue #243

---
 vcf/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index ef1edb7..5adf2b7 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -23,7 +23,7 @@ def __init__(self, site, sample, data):
         #: Namedtuple of data from the VCF file
         self.data = data
 
-        if hasattr(self.data, 'GT'):
+        if getattr(self.data, 'GT', None) is not None:
             self.gt_alleles = [(al if al != '.' else None) for al in allele_delimiter.split(self.data.GT)]
             self.ploidity = len(self.gt_alleles)
             self.called = all([al != None for al in self.gt_alleles])
@@ -279,7 +279,7 @@ def genotype(self, name):
     @property
     def num_called(self):
         """ The number of called samples"""
-        return sum(s.called for s in self.samples)
+        return sum(1 for s in self.samples if s.called)
 
     @property
     def call_rate(self):

From c6ee46e1a81de389bca0b5b6791c10867efd2241 Mon Sep 17 00:00:00 2001
From: trijntje <R.R.vandenberg@xs4all.nl>
Date: Sun, 31 Jul 2016 12:17:54 +0200
Subject: [PATCH 149/168] Moved code into Reader._parse_filter so it can be
 reused for FT

---
 vcf/parser.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index cff1adf..9e40474 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -359,6 +359,15 @@ def _map(self, func, iterable, bad='.'):
         return [func(x) if x != bad else None
                 for x in iterable]
 
+    def _parse_filter(self, filt_str):
+        '''Parse the FILTER field of a VCF entry into a Python list'''
+        if filt_str == '.':
+            return None
+        elif filt_str == 'PASS':
+            return []
+        else:
+            return filt_str.split(';')
+
     def _parse_info(self, info_str):
         '''Parse the INFO field of a VCF entry into a dictionary of Python
         types.
@@ -562,13 +571,7 @@ def next(self):
             except ValueError:
                 qual = None
 
-        filt = row[6]
-        if filt == '.':
-            filt = None
-        elif filt == 'PASS':
-            filt = []
-        else:
-            filt = filt.split(';')
+        filt = self._parse_filter(row[6])
         info = self._parse_info(row[7])
 
         try:

From d703113b98e0988b0502369e4e40938cb4516d41 Mon Sep 17 00:00:00 2001
From: trijntje <R.R.vandenberg@xs4all.nl>
Date: Sun, 31 Jul 2016 12:35:11 +0200
Subject: [PATCH 150/168] Added example file and 2 testcases

---
 vcf/test/FT.vcf      | 50 ++++++++++++++++++++++++++++++++++++++++++++
 vcf/test/test_vcf.py | 32 ++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 vcf/test/FT.vcf

diff --git a/vcf/test/FT.vcf b/vcf/test/FT.vcf
new file mode 100644
index 0000000..e42436a
--- /dev/null
+++ b/vcf/test/FT.vcf
@@ -0,0 +1,50 @@
+##fileformat=VCFv4.2
+##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
+##FILTER=<ID=DP125,Description="DP<125">
+##FILTER=<ID=DP130,Description="DP<130">
+##FILTER=<ID=LowQual,Description="Low quality">
+##FILTER=<ID=Q4800,Description="QUAL<4800.0">
+##FILTER=<ID=Q5000,Description="QUAL<5000.0">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=FT,Number=.,Type=String,Description="Genotype-level filter">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=RGQ,Number=1,Type=Integer,Description="Unconditional reference genotype confidence, encoded as a phred quality -10*log10 p(genotype call is wrong)">
+##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.">
+##GATKCommandLine.VariantFiltration=<ID=VariantFiltration,Version=3.6-0-g89b7209,Date="Wed Jul 27 09:50:44 CEST 2016",Epoch=1469605844963,CommandLineOptions="analysis_type=VariantFiltration input_file=[] showFullBamList=false read_buffer_size=null read_filter=[] disable_read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=../ref.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 static_quantized_quals=null round_down_quantized=false disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false no_cmdline_in_header=false sites_only=false never_trim_vcf_format_field=false bcf=false bam_compression=null simplifyBAM=false disable_bam_indexing=false generate_md5=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 reference_window_stop=0 phone_home= gatk_key=null tag=NA logging_level=INFO log_to_file=null help=false version=false variant=(RodBinding name=variant source=10.vcf) mask=(RodBinding name= source=UNBOUND) out=/home/redmar/tmp/example/snps/10_filt.vcf filterExpression=[QUAL<5000.0, QUAL<4800.0] filterName=[Q5000, Q4800] genotypeFilterExpression=[DP<130, DP<125] genotypeFilterName=[DP130, DP125] clusterSize=3 clusterWindowSize=0 maskExtension=0 maskName=Mask filterNotInMask=false missingValuesInExpressionsShouldEvaluateAsFailing=false invalidatePreviousFilters=false invertFilterExpression=false invertGenotypeFilterExpression=false setFilteredGtToNocall=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
+##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
+##contig=<ID=ref,length=4888768>
+##reference=file://../ref.fasta
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	1	2	3	4	5
+ref	63393	.	C	A	29454.60	.	AC=5;AF=1.00;AN=5;DP=719;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=29.67;SOR=0.965	GT:AD:DP:GQ:PL	1:0,166:166:99:6740,0	1:0,142:142:99:5824,0	1:0,134:134:99:5616,0	1:0,122:122:99:4930,0	1:0,155:155:99:6371,0
+ref	65903	.	AATTGCGCTG	A	7340.57	PASS	AC=1;AF=0.200;AN=5;DP=524;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=34.04;SOR=1.091	GT:AD:DP:FT:GQ:PL	1:0,164:164:PASS:99:7383,0	0:95,0:95:DP125;DP130:99:0,1800	0:88,0:88:DP125;DP130:99:0,1800	0:87,0:87:DP125;DP130:99:0,1800	0:89,0:89:DP125;DP130:99:0,1800
+ref	70837	.	C	A	4711.61	Q4800;Q5000	AC=1;AF=0.200;AN=5;DP=512;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=27.64;SOR=0.726	GT:AD:DP:FT:GQ:PL	0:121,0:121:DP125;DP130:99:0,1800	0:95,0:95:DP125;DP130:99:0,1800	1:0,120:120:DP125;DP130:99:4745,0	0:87,0:87:DP125;DP130:99:0,1800	0:89,0:89:DP125;DP130:99:0,1800
+ref	71448	.	C	T	31134.60	PASS	AC=5;AF=1.00;AN=5;BaseQRankSum=2.22;ClippingRankSum=0.00;DP=768;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;MQRankSum=0.00;QD=29.43;ReadPosRankSum=2.03;SOR=0.295	GT:AD:DP:FT:GQ:PL	1:0,147:147:PASS:99:5996,0	1:1,183:184:PASS:99:7501,0	1:0,113:113:DP125;DP130:99:4559,0	1:0,161:161:PASS:99:6436,0	1:0,163:163:PASS:99:6669,0
+ref	104257	.	C	T	5521.61	PASS	AC=1;AF=0.200;AN=5;DP=506;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=29.45;SOR=0.854	GT:AD:DP:FT:GQ:PL	0:101,0:101:DP125;DP130:99:0,1800	0:109,0:109:DP125;DP130:99:0,1800	1:0,132:132:PASS:99:5555,0	0:67,0:67:DP125;DP130:99:0,1800	0:97,0:97:DP125;DP130:99:0,1800
+ref	140658	.	C	A	32467.60	PASS	AC=5;AF=1.00;AN=5;BaseQRankSum=2.24;ClippingRankSum=0.00;DP=801;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;MQRankSum=0.00;QD=29.65;ReadPosRankSum=1.27;SOR=0.346	GT:AD:DP:GQ:PL	1:0,170:170:99:6854,0	1:0,198:198:99:8098,0	1:0,136:136:99:5554,0	1:0,141:141:99:5661,0	1:1,155:156:99:6327,0
+ref	147463	.	C	A	4885.61	Q5000	AC=1;AF=0.200;AN=5;BaseQRankSum=-7.720e-01;ClippingRankSum=0.00;DP=503;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;MQRankSum=0.00;QD=35.03;ReadPosRankSum=-6.950e-01;SOR=0.278	GT:AD:DP:FT:GQ:PL	0:97,0:97:DP125;DP130:99:0,1800	0:104,0:104:DP125;DP130:99:0,1800	0:84,0:84:DP125;DP130:99:0,1800	1:1,128:129:DP130:99:4919,0	0:89,0:89:DP125;DP130:99:0,1800
+ref	154578	.	A	G	32015.60	PASS	AC=5;AF=1.00;AN=5;DP=776;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=25.82;SOR=0.902	GT:AD:DP:GQ:PL	1:0,152:152:99:6300,0	1:0,183:183:99:7608,0	1:0,137:137:99:5713,0	1:0,148:148:99:6040,0	1:0,156:156:99:6381,0
+ref	203200	.	C	T	30880.60	PASS	AC=5;AF=1.00;AN=5;DP=752;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=29.65;SOR=0.878	GT:AD:DP:FT:GQ:PL	1:0,161:161:PASS:99:6708,0	1:0,185:185:PASS:99:7602,0	1:0,136:136:PASS:99:5602,0	1:0,126:126:DP130:99:5080,0	1:0,144:144:PASS:99:5915,0
+ref	231665	.	C	T	30074.60	PASS	AC=5;AF=1.00;AN=5;DP=735;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=33.23;SOR=0.938	GT:AD:DP:FT:GQ:PL	1:0,191:191:PASS:99:7867,0	1:0,159:159:PASS:99:6431,0	1:0,130:130:PASS:99:5299,0	1:0,129:129:DP130:99:5290,0	1:0,126:126:DP130:99:5214,0
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 2064bf8..b0c3ef8 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1288,6 +1288,37 @@ def test_vcf_metadata_parser_doesnt_break_with_empty_number_tags(self):
             self.fail(msg)
 
 
+class TestIssue246(unittest.TestCase):
+    """ See https://github.com/jamescasbon/PyVCF/issues/246 """
+
+    def test_FT_pass_two(self):
+        reader=vcf.Reader(fh('FT.vcf'))
+        next(reader)
+        r=next(reader)
+        target=[
+            [],
+            ['DP125','DP130'],
+            ['DP125','DP130'],
+            ['DP125','DP130'],
+            ['DP125','DP130']
+        ]
+        result=[call.data.FT for call in r.samples]
+        self.assertEqual(target,result)
+
+    def test_FT_one_two(self):
+        reader=list(vcf.Reader(fh('FT.vcf')))
+        r=reader[6]
+        target=[
+            ['DP125','DP130'],
+            ['DP125','DP130'],
+            ['DP125','DP130'],
+            ['DP130'],
+            ['DP125','DP130']
+        ]
+        result=[call.data.FT for call in r.samples]
+        self.assertEqual(target,result)
+            
+
 class TestOpenMethods(unittest.TestCase):
 
     samples = 'NA00001 NA00002 NA00003'.split()
@@ -1602,6 +1633,7 @@ def test_strelka(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFetch))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue234))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue246))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From 2f5777efd5eedd3f534f5c4037fdf1c6416efde4 Mon Sep 17 00:00:00 2001
From: trijntje <R.R.vandenberg@xs4all.nl>
Date: Sun, 31 Jul 2016 12:54:05 +0200
Subject: [PATCH 151/168] Added FT as a special case when parsing format

---
 vcf/cparse.pyx | 17 +++++++++++++++++
 vcf/parser.py  | 10 +++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index 8a71d64..d6a93ae 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -9,6 +9,19 @@ INTEGER = 'Integer'
 FLOAT = 'Float'
 NUMERIC = 'Numeric'
 
+def _parse_filter(filt_str):
+    '''Parse the FILTER field of a VCF entry into a Python list
+
+    NOTE: this method has a python equivalent and care must be taken
+    to keep the two methods equivalent
+    '''
+    if filt_str == '.':
+        return None
+    elif filt_str == 'PASS':
+        return []
+    else:
+        return filt_str.split(';')
+
 def parse_samples(
         list names, list samples, samp_fmt,
         list samp_fmt_types, list samp_fmt_nums, site):
@@ -39,6 +52,10 @@ def parse_samples(
             if samp_fmt._fields[j] == 'GT':
                 sampdat[j] = vals
                 continue
+            # genotype filters are a special case
+            elif samp_fmt._fields[j] == 'FT':
+                sampdat[j] = _parse_filter(vals)
+                continue
             elif not vals or vals == '.':
                 sampdat[j] = None
                 continue
diff --git a/vcf/parser.py b/vcf/parser.py
index 9e40474..5e7816b 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -360,7 +360,11 @@ def _map(self, func, iterable, bad='.'):
                 for x in iterable]
 
     def _parse_filter(self, filt_str):
-        '''Parse the FILTER field of a VCF entry into a Python list'''
+        '''Parse the FILTER field of a VCF entry into a Python list
+
+        NOTE: this method has a cython equivalent and care must be taken
+        to keep the two methods equivalent
+        '''
         if filt_str == '.':
             return None
         elif filt_str == 'PASS':
@@ -475,6 +479,10 @@ def _parse_samples(self, samples, samp_fmt, site):
                 if samp_fmt._fields[i] == 'GT':
                     sampdat[i] = vals
                     continue
+                # genotype filters are a special case
+                elif samp_fmt._fields[i] == 'FT':
+                    sampdat[i] = self._parse_filter(vals)
+                    continue
                 elif not vals or vals == ".":
                     sampdat[i] = None
                     continue

From 7d675ad367414acd255830764325cbfd715c3d1b Mon Sep 17 00:00:00 2001
From: Redmar <redmar@ubuntu.com>
Date: Mon, 1 Aug 2016 13:33:50 +0200
Subject: [PATCH 152/168] Implemented is_filt for _Call and _Record

---
 vcf/model.py         | 29 +++++++++++++++++++++++++++++
 vcf/test/test_vcf.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/vcf/model.py b/vcf/model.py
index 5adf2b7..535286c 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -117,6 +117,22 @@ def is_het(self):
             return None
         return self.gt_type == 1
 
+    @property
+    def is_filt(self):
+        """ Return True for filtered calls """
+        try: # no FT annotation present for this variant
+            FT=self.data.FT
+        except AttributeError:
+            return False
+        if FT == None or FT == []: # FT is not set or set to PASS
+            return False
+        elif len(FT) > 0: # FT contains one or more filters
+            return True
+        else: # This should not happen
+            raise RuntimeError(
+                "Parsing error for FT annotation in {}, "\
+                "please file a bug".format(self))
+
 
 class _Record(object):
     """ A set of calls at a site.  Equivalent to a row in a VCF file.
@@ -536,6 +552,19 @@ def is_monomorphic(self):
         """ Return True for reference calls """
         return len(self.ALT) == 1 and self.ALT[0] is None
 
+    @property
+    def is_filt(self,call=None):
+        """ Return True if a variant has been filtered """
+        FT=self.FILTER
+        if FT == None or FT == []: # FT is not set or set to PASS
+            return False
+        elif len(FT) > 0: # FT contains one or more filters
+            return True
+        else: # This should not happen
+            raise RuntimeError(
+                "Parsing error for FILTER annotation in {}, "\
+                "please file a bug".format(self))
+
 
 class _AltRecord(object):
     '''An alternative allele record: either replacement string, SV placeholder, or breakend'''
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index b0c3ef8..4e62acb 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1319,6 +1319,41 @@ def test_FT_one_two(self):
         self.assertEqual(target,result)
             
 
+class TestIsFilt(unittest.TestCase):
+    """ Test is_filt property for _Call and _Record """
+
+    def test_is_filt_record(self):
+        reader = vcf.Reader(fh('FT.vcf'))
+        target = [
+            False, False, True, False, False,
+            False, True, False, False, False
+        ]
+        result = [record.is_filt for record in reader]
+        self.assertEqual(target,result)
+
+    def test_is_filt_call_unset(self):
+        reader = vcf.Reader(fh('FT.vcf'))
+        record = next(reader)
+        target = [False]*5
+        result = [call.is_filt for call in record]
+        self.assertEqual(target,result)
+
+    def test_is_filt_call_pass_two(self):
+        reader = vcf.Reader(fh('FT.vcf'))
+        next(reader)
+        record = next(reader)
+        target = [False, True, True, True, True]
+        result = [call.is_filt for call in record]
+        self.assertEqual(target,result)
+
+    def test_is_filt_call_one(self):
+        reader = list(vcf.Reader(fh('FT.vcf')))
+        record = reader[6]
+        target = [True]*5
+        result = [call.is_filt for call in record]
+        self.assertEqual(target,result)
+
+
 class TestOpenMethods(unittest.TestCase):
 
     samples = 'NA00001 NA00002 NA00003'.split()
@@ -1634,6 +1669,7 @@ def test_strelka(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue234))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue246))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIsFilt))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From 301f8fe4921c7cef3f4f1886fe4fba3fa693bbd6 Mon Sep 17 00:00:00 2001
From: Redmar <redmar@ubuntu.com>
Date: Sat, 13 Aug 2016 11:41:32 +0200
Subject: [PATCH 153/168] Minor code cleanup

---
 vcf/model.py         | 24 ++++++++----------------
 vcf/test/test_vcf.py | 14 +++++++-------
 2 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 535286c..e6a8339 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -118,20 +118,16 @@ def is_het(self):
         return self.gt_type == 1
 
     @property
-    def is_filt(self):
+    def is_filtered(self):
         """ Return True for filtered calls """
         try: # no FT annotation present for this variant
-            FT=self.data.FT
+            filt = self.data.FT
         except AttributeError:
             return False
-        if FT == None or FT == []: # FT is not set or set to PASS
+        if filt is None or len(filt) == 0: # FT is not set or set to PASS
             return False
-        elif len(FT) > 0: # FT contains one or more filters
+        else:
             return True
-        else: # This should not happen
-            raise RuntimeError(
-                "Parsing error for FT annotation in {}, "\
-                "please file a bug".format(self))
 
 
 class _Record(object):
@@ -553,17 +549,13 @@ def is_monomorphic(self):
         return len(self.ALT) == 1 and self.ALT[0] is None
 
     @property
-    def is_filt(self,call=None):
+    def is_filtered(self):
         """ Return True if a variant has been filtered """
-        FT=self.FILTER
-        if FT == None or FT == []: # FT is not set or set to PASS
+        filt = self.FILTER
+        if filt is None or len(filt) == 0: # FILTER is not set or set to PASS
             return False
-        elif len(FT) > 0: # FT contains one or more filters
+        else:
             return True
-        else: # This should not happen
-            raise RuntimeError(
-                "Parsing error for FILTER annotation in {}, "\
-                "please file a bug".format(self))
 
 
 class _AltRecord(object):
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 4e62acb..a21b588 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1319,8 +1319,8 @@ def test_FT_one_two(self):
         self.assertEqual(target,result)
             
 
-class TestIsFilt(unittest.TestCase):
-    """ Test is_filt property for _Call and _Record """
+class TestIsFiltered(unittest.TestCase):
+    """ Test is_filtered property for _Call and _Record """
 
     def test_is_filt_record(self):
         reader = vcf.Reader(fh('FT.vcf'))
@@ -1328,14 +1328,14 @@ def test_is_filt_record(self):
             False, False, True, False, False,
             False, True, False, False, False
         ]
-        result = [record.is_filt for record in reader]
+        result = [record.is_filtered for record in reader]
         self.assertEqual(target,result)
 
     def test_is_filt_call_unset(self):
         reader = vcf.Reader(fh('FT.vcf'))
         record = next(reader)
         target = [False]*5
-        result = [call.is_filt for call in record]
+        result = [call.is_filtered for call in record]
         self.assertEqual(target,result)
 
     def test_is_filt_call_pass_two(self):
@@ -1343,14 +1343,14 @@ def test_is_filt_call_pass_two(self):
         next(reader)
         record = next(reader)
         target = [False, True, True, True, True]
-        result = [call.is_filt for call in record]
+        result = [call.is_filtered for call in record]
         self.assertEqual(target,result)
 
     def test_is_filt_call_one(self):
         reader = list(vcf.Reader(fh('FT.vcf')))
         record = reader[6]
         target = [True]*5
-        result = [call.is_filt for call in record]
+        result = [call.is_filtered for call in record]
         self.assertEqual(target,result)
 
 
@@ -1669,7 +1669,7 @@ def test_strelka(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue234))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue246))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIsFilt))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIsFiltered))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))

From de3e2e9cb25511f6cd6dcd3a4e04e89b4f0b81d1 Mon Sep 17 00:00:00 2001
From: Redmar <redmar@ubuntu.com>
Date: Mon, 22 Aug 2016 11:21:51 +0200
Subject: [PATCH 154/168] Added support for writing FT annotations

---
 vcf/parser.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 5e7816b..029b18a 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -753,10 +753,26 @@ def _format_sample(self, fmt, sample):
             gt = './.' if 'GT' in fmt else ''
 
         if not gt:
-            return ':'.join([self._stringify(x) for x in sample.data])
+            result = []
+            for field in sample.data._fields:
+                value = getattr(sample.data,field)
+                if field == 'FT':
+                    result.append(self._format_filter(value))
+                else:
+                    result.append(self._stringify(value))
+            return ':'.join(result)
         # Following the VCF spec, GT is always the first item whenever it is present.
         else:
-            return ':'.join([gt] + [self._stringify(x) for x in sample.data[1:]])
+            result = []
+            for field in sample.data._fields:
+                value = getattr(sample.data,field)
+                if field == 'GT':
+                    continue
+                if field == 'FT':
+                    result.append(self._format_filter(value))
+                else:
+                    result.append(self._stringify(value))
+            return ':'.join([gt] + result)
 
     def _stringify(self, x, none='.', delim=','):
         if type(x) == type([]):

From ea86881578be594823055f95c16c72390a287a2e Mon Sep 17 00:00:00 2001
From: Redmar <redmar@ubuntu.com>
Date: Mon, 5 Sep 2016 08:39:08 +0200
Subject: [PATCH 155/168] Removed code duplication

---
 vcf/parser.py | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index 029b18a..fbdaf25 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -752,27 +752,17 @@ def _format_sample(self, fmt, sample):
         else:
             gt = './.' if 'GT' in fmt else ''
 
-        if not gt:
-            result = []
-            for field in sample.data._fields:
-                value = getattr(sample.data,field)
-                if field == 'FT':
-                    result.append(self._format_filter(value))
-                else:
-                    result.append(self._stringify(value))
-            return ':'.join(result)
+        result = [gt] if gt else []
         # Following the VCF spec, GT is always the first item whenever it is present.
-        else:
-            result = []
-            for field in sample.data._fields:
-                value = getattr(sample.data,field)
-                if field == 'GT':
-                    continue
-                if field == 'FT':
-                    result.append(self._format_filter(value))
-                else:
-                    result.append(self._stringify(value))
-            return ':'.join([gt] + result)
+        for field in sample.data._fields:
+            value = getattr(sample.data,field)
+            if field == 'GT':
+                continue
+            if field == 'FT':
+                result.append(self._format_filter(value))
+            else:
+                result.append(self._stringify(value))
+        return ':'.join(result)
 
     def _stringify(self, x, none='.', delim=','):
         if type(x) == type([]):

From abe72f5e4690a7068745cc3d6e8e91a3d720518c Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Wed, 19 Oct 2016 14:18:28 -0700
Subject: [PATCH 156/168] Fix docstring spelling

---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index fbdaf25..00a7666 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -78,7 +78,7 @@
 
 
 class _vcf_metadata_parser(object):
-    '''Parse the metadat in the header of a VCF file.'''
+    '''Parse the metadata in the header of a VCF file.'''
     def __init__(self):
         super(_vcf_metadata_parser, self).__init__()
         self.info_pattern = re.compile(r'''\#\#INFO=<

From f4d719fb8584fd91ffb51ba6436baed9e553aec4 Mon Sep 17 00:00:00 2001
From: rwness <rwness@gmail.com>
Date: Wed, 9 Mar 2016 09:40:05 -0500
Subject: [PATCH 157/168] Correct indel definition

1. Corrected RECORD.is_indel to not call reference sites as indels
2. Corrected RECORD.is_deletion to not call reference sites as deletions
3. Added deletion site to vcf/test/example-4.0.vcf
4. Added deletion site to vcf/test/walk_left.vcf
5. Corrected tests to account for site 1230237 not being an indel or
deletion
5. Added tests for new site 1231234 which is an actual deletion
---
 vcf/model.py             |  4 ++--
 vcf/test/example-4.0.vcf |  1 +
 vcf/test/test_vcf.py     | 46 ++++++++++++++++++++++++++++++++++------
 vcf/test/walk_left.vcf   |  1 +
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index e6a8339..375a3f8 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -401,7 +401,7 @@ def is_indel(self):
             return True
         for alt in self.ALT:
             if alt is None:
-                return True
+                return False
             if alt.type != "SNV" and alt.type != "MNV":
                 return False
             elif len(alt) != len(self.REF):
@@ -452,7 +452,7 @@ def is_deletion(self):
             # just one alt allele
             alt_allele = self.ALT[0]
             if alt_allele is None:
-                return True
+                return False
             if len(self.REF) > len(alt_allele):
                 return True
             else:
diff --git a/vcf/test/example-4.0.vcf b/vcf/test/example-4.0.vcf
index 27803a1..97fb07e 100644
--- a/vcf/test/example-4.0.vcf
+++ b/vcf/test/example-4.0.vcf
@@ -20,4 +20,5 @@
 20	17330	.	T	A	3.0	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3
 20	1110696	rs6040355	A	G,T	1e+03	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4
 20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
+20	1231234	.	AT	A	46	PASS	NS=3;DP=15;AA=A	GT:GQ:DP:HQ	1|1:23:7:26,30	0|0:27:9:56,60	0|0:31:10:65,71
 20	1234567	microsat1	GTCT	G,GTACT	.	PASS	NS=3;DP=9;AA=G	GT:GQ:DP	./.:35:4	0/2:17:2	1/1:40:3
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index a21b588..a47f4fa 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -578,6 +578,8 @@ def test_call_rate(self):
                 self.assertEqual(3.0/3.0, call_rate)
             if var.POS == 1230237:
                 self.assertEqual(3.0/3.0, call_rate)
+            if var.POS == 1231234:
+                self.assertEqual(3.0/3.0, call_rate)
             elif var.POS == 1234567:
                 self.assertEqual(2.0/3.0, call_rate)
 
@@ -593,6 +595,8 @@ def test_aaf(self):
                 self.assertEqual([2.0/6.0, 4.0/6.0], aaf)
             if var.POS == 1230237:
                 self.assertEqual([0.0/6.0], aaf)
+            if var.POS == 1231234:
+                self.assertEqual([2.0/6.0], aaf)
             elif var.POS == 1234567:
                 self.assertEqual([2.0/4.0, 1.0/4.0], aaf)
         reader = vcf.Reader(fh('example-4.1-ploidy.vcf'))
@@ -615,6 +619,8 @@ def test_pi(self):
                 self.assertEqual(None, pi)
             if var.POS == 1230237:
                 self.assertEqual(0.0/6.0, pi)
+            if var.POS == 1231234:
+                self.assertEqual((6.0/(6.0-1))*(2.0*(1.0/3.0)*(2.0/3.0)) , pi)
             elif var.POS == 1234567:
                 self.assertEqual(None, pi)
 
@@ -630,6 +636,8 @@ def test_heterozygosity(self):
                 self.assertEqual(4.0/9.0, het)
             if var.POS == 1230237:
                 self.assertEqual(0.0, het)
+            if var.POS == 1231234:
+                self.assertEqual(4.0/9.0, het)
             elif var.POS == 1234567:
                 self.assertEqual(5.0/8.0, het)
 
@@ -650,6 +658,8 @@ def test_is_snp(self):
                 self.assertEqual(True, is_snp)
             if var.POS == 1230237:
                 self.assertEqual(False, is_snp)
+            if var.POS == 1231234:
+                self.assertEqual(False, is_snp)
             elif var.POS == 1234567:
                 self.assertEqual(False, is_snp)
 
@@ -682,6 +692,8 @@ def test_is_indel(self):
             if var.POS == 1110696:
                 self.assertEqual(False, is_indel)
             if var.POS == 1230237:
+                self.assertEqual(False, is_indel)
+            if var.POS == 1231234:
                 self.assertEqual(True, is_indel)
             elif var.POS == 1234567:
                 self.assertEqual(True, is_indel)
@@ -698,6 +710,8 @@ def test_is_transition(self):
                 self.assertEqual(False, is_trans)
             if var.POS == 1230237:
                 self.assertEqual(False, is_trans)
+            if var.POS == 1231234:
+                self.assertEqual(False, is_trans)
             elif var.POS == 1234567:
                 self.assertEqual(False, is_trans)
 
@@ -712,6 +726,8 @@ def test_is_deletion(self):
             if var.POS == 1110696:
                 self.assertEqual(False, is_del)
             if var.POS == 1230237:
+                self.assertEqual(False, is_del)
+            if var.POS == 1231234:
                 self.assertEqual(True, is_del)
             elif var.POS == 1234567:
                 self.assertEqual(False, is_del)
@@ -727,6 +743,8 @@ def test_var_type(self):
             if var.POS == 1110696:
                 self.assertEqual("snp", type)
             if var.POS == 1230237:
+                self.assertEqual("unknown", type)
+            if var.POS == 1231234:
                 self.assertEqual("indel", type)
             elif var.POS == 1234567:
                 self.assertEqual("indel", type)
@@ -759,6 +777,8 @@ def test_var_subtype(self):
             if var.POS == 1110696:
                 self.assertEqual("unknown", subtype)
             if var.POS == 1230237:
+                self.assertEqual("unknown", subtype)
+            if var.POS == 1231234:
                 self.assertEqual("del", subtype)
             elif var.POS == 1234567:
                 self.assertEqual("unknown", subtype)
@@ -807,6 +827,8 @@ def test_is_sv(self):
                 self.assertEqual(False, is_sv)
             if var.POS == 1230237:
                 self.assertEqual(False, is_sv)
+            if var.POS == 1231234:
+                self.assertEqual(False, is_sv)
             elif var.POS == 1234567:
                 self.assertEqual(False, is_sv)
 
@@ -838,6 +860,8 @@ def test_is_sv_precise(self):
                 self.assertEqual(False, is_precise)
             if var.POS == 1230237:
                 self.assertEqual(False, is_precise)
+            if var.POS == 1231234:
+                self.assertEqual(False, is_precise)
             elif var.POS == 1234567:
                 self.assertEqual(False, is_precise)
 
@@ -869,6 +893,8 @@ def test_sv_end(self):
                 self.assertEqual(None, sv_end)
             if var.POS == 1230237:
                 self.assertEqual(None, sv_end)
+            if var.POS == 1231234:
+                self.assertEqual(None, sv_end)
             elif var.POS == 1234567:
                 self.assertEqual(None, sv_end)
 
@@ -885,6 +911,8 @@ def test_qual(self):
                 expected = 1e+03
             if var.POS == 1230237:
                 expected = 47
+            if var.POS == 1231234:
+                expected = 46
             elif var.POS == 1234567:
                 expected = None
             self.assertEqual(expected, qual)
@@ -1166,6 +1194,8 @@ def test_phased(self):
                 self.assertEqual([True, True, False], phases)
             if var.POS == 1230237:
                 self.assertEqual([True, True, False], phases)
+            if var.POS == 1231234:
+                self.assertEqual([True, True, True], phases)
             elif var.POS == 1234567:
                 self.assertEqual([False, False, False], phases)
 
@@ -1181,6 +1211,8 @@ def test_gt_bases(self):
                 self.assertEqual(['G|T', 'T|G', 'T/T'], gt_bases)
             elif var.POS == 1230237:
                 self.assertEqual(['T|T', 'T|T', 'T/T'], gt_bases)
+            elif var.POS == 1231234:
+                self.assertEqual(['A|A', 'AT|AT', 'AT|AT'], gt_bases)
             elif var.POS == 1234567:
                 self.assertEqual([None, 'GTCT/GTACT', 'G/G'], gt_bases)
 
@@ -1198,6 +1230,8 @@ def test_gt_types(self):
                 self.assertEqual([1,1,2], gt_types)
             elif var.POS == 1230237:
                 self.assertEqual([0,0,0], gt_types)
+            elif var.POS == 1231234:
+                self.assertEqual([2,0,0], gt_types)
             elif var.POS == 1234567:
                 self.assertEqual([None,1,2], gt_types)
 
@@ -1235,20 +1269,20 @@ def testFetchRange(self):
 
         fetched_variants = self.reader.fetch('20', 1110695, 1234567)
         self.assertFetchedExpectedPositions(
-                fetched_variants, [1110696, 1230237, 1234567])
+                fetched_variants, [1110696, 1230237, 1231234, 1234567])
 
 
     def testFetchesFromStartIfStartOnlySpecified(self):
         fetched_variants = self.reader.fetch('20', 1110695)
         self.assertFetchedExpectedPositions(
-                fetched_variants, [1110696, 1230237, 1234567])
+                fetched_variants, [1110696, 1230237, 1231234, 1234567])
 
 
     def testFetchesAllFromChromIfOnlyChromSpecified(self):
         fetched_variants = self.reader.fetch('20')
         self.assertFetchedExpectedPositions(
                 fetched_variants,
-                [14370, 17330, 1110696, 1230237, 1234567]
+                [14370, 17330, 1110696, 1230237, 1231234, 1234567]
         )
 
 
@@ -1517,10 +1551,10 @@ def test_walk(self):
             self.assertEqual(x[0], x[1])
             self.assertEqual(x[1], x[2])
             n+= 1
-        self.assertEqual(n, 5)
+        self.assertEqual(n, 6)
 
-        # artificial case 2 from the left, 2 from the right, 2 together, 1 from the right, 1 from the left
-        expected = 'llrrttrl'
+        # artificial case 2 from the left, 2 from the right, 3 together, 1 from the right, 1 from the left
+        expected = 'llrrtttrl'
         reader1 = vcf.Reader(fh('walk_left.vcf'))
         reader2 = vcf.Reader(fh('example-4.0.vcf'))
 
diff --git a/vcf/test/walk_left.vcf b/vcf/test/walk_left.vcf
index c910432..aafb82b 100644
--- a/vcf/test/walk_left.vcf
+++ b/vcf/test/walk_left.vcf
@@ -21,4 +21,5 @@
 19	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:65,3
 20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:65,4
 20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2:65,3
+20	1231234	.	AT	A	46	PASS	NS=3;DP=15;AA=A	GT:GQ:DP:HQ	1|1:23:7:26,30	0|0:27:9:56,60	0|0:31:10:65,71
 21	1234567	microsat1	GTCT	G,GTACT	50	PASS	NS=3;DP=9;AA=G	GT:GQ:DP	./.:35:4	0/2:17:2	1/1:40:3

From 8b54f4e672733a32a019e8509864ff24a3bd6b8b Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 26 Jan 2017 20:57:09 +0100
Subject: [PATCH 158/168] Undo some collateral damage to tests

---
 vcf/test/test_vcf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index a47f4fa..ec69920 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1269,20 +1269,20 @@ def testFetchRange(self):
 
         fetched_variants = self.reader.fetch('20', 1110695, 1234567)
         self.assertFetchedExpectedPositions(
-                fetched_variants, [1110696, 1230237, 1231234, 1234567])
+                fetched_variants, [1110696, 1230237, 1234567])
 
 
     def testFetchesFromStartIfStartOnlySpecified(self):
         fetched_variants = self.reader.fetch('20', 1110695)
         self.assertFetchedExpectedPositions(
-                fetched_variants, [1110696, 1230237, 1231234, 1234567])
+                fetched_variants, [1110696, 1230237, 1234567])
 
 
     def testFetchesAllFromChromIfOnlyChromSpecified(self):
         fetched_variants = self.reader.fetch('20')
         self.assertFetchedExpectedPositions(
                 fetched_variants,
-                [14370, 17330, 1110696, 1230237, 1231234, 1234567]
+                [14370, 17330, 1110696, 1230237, 1234567]
         )
 
 
@@ -1351,7 +1351,7 @@ def test_FT_one_two(self):
         ]
         result=[call.data.FT for call in r.samples]
         self.assertEqual(target,result)
-            
+
 
 class TestIsFiltered(unittest.TestCase):
     """ Test is_filtered property for _Call and _Record """

From 7bf793f9508b37549d466efe0686fa6116873cc6 Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Thu, 26 Jan 2017 20:11:37 +0100
Subject: [PATCH 159/168] Always use a list for list-type fields

Singleton lists - e.g. Number=A with a single allele - are now parsed
into lists instead of being treated as single values. This is more
consistent with the meaning of the field definition and thus easier
for client code.

Fixes #254.
---
 vcf/cparse.pyx         |  8 +-------
 vcf/parser.py          |  8 +-------
 vcf/test/issue-254.vcf |  9 +++++++++
 vcf/test/test_vcf.py   | 12 ++++++++++++
 4 files changed, 23 insertions(+), 14 deletions(-)
 create mode 100644 vcf/test/issue-254.vcf

diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index d6a93ae..87f806d 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -65,8 +65,7 @@ def parse_samples(
             entry_num = samp_fmt_nums[j]
 
             # we don't need to split single entries
-            if entry_num == 1 or ',' not in vals:
-
+            if entry_num == 1:
                 if entry_type == INTEGER:
                     try:
                         sampdat[j] = int(vals)
@@ -76,14 +75,9 @@ def parse_samples(
                     sampdat[j] = float(vals)
                 else:
                     sampdat[j] = vals
-
-                if entry_num != 1:
-                    sampdat[j] = (sampdat[j])
-
                 continue
 
             vals = vals.split(',')
-
             if entry_type == INTEGER:
                 try:
                     sampdat[j] = _map(int, vals)
diff --git a/vcf/parser.py b/vcf/parser.py
index 00a7666..bb7c90c 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -491,8 +491,7 @@ def _parse_samples(self, samples, samp_fmt, site):
                 entry_type = samp_fmt._types[i]
 
                 # we don't need to split single entries
-                if entry_num == 1 or ',' not in vals:
-
+                if entry_num == 1:
                     if entry_type == 'Integer':
                         try:
                             sampdat[i] = int(vals)
@@ -502,14 +501,9 @@ def _parse_samples(self, samples, samp_fmt, site):
                         sampdat[i] = float(vals)
                     else:
                         sampdat[i] = vals
-
-                    if entry_num != 1:
-                        sampdat[i] = (sampdat[i])
-
                     continue
 
                 vals = vals.split(',')
-
                 if entry_type == 'Integer':
                     try:
                         sampdat[i] = _map(int, vals)
diff --git a/vcf/test/issue-254.vcf b/vcf/test/issue-254.vcf
new file mode 100644
index 0000000..c17262d
--- /dev/null
+++ b/vcf/test/issue-254.vcf
@@ -0,0 +1,9 @@
+##fileformat=VCFv4.1
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+21	4242421	.	T	A	30	.	.	GT:AO	0|0:0.1	0|1:0.2	0/0:0.3
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index ec69920..a09b0b9 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1353,6 +1353,17 @@ def test_FT_one_two(self):
         self.assertEqual(target,result)
 
 
+class TestIssue254(unittest.TestCase):
+    """ See https://github.com/jamescasbon/PyVCF/issues/254 """
+
+    def test_remains_singleton_list(self):
+        reader = vcf.Reader(fh('issue-254.vcf'))
+        record = next(reader)
+        expected = [[0.1], [0.2], [0.3]]
+        actual = [call.data.AO for call in record.samples]
+        self.assertEqual(expected, actual)
+
+
 class TestIsFiltered(unittest.TestCase):
     """ Test is_filtered property for _Call and _Record """
 
@@ -1703,6 +1714,7 @@ def test_strelka(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue234))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue246))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue254))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIsFiltered))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter))

From 1fc5ca3b467e5c45a49a711d06b2f5308e0b835f Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Thu, 26 Jan 2017 20:22:19 +0100
Subject: [PATCH 160/168] Unify code paths' treatment of "Numeric" type

---
 vcf/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vcf/parser.py b/vcf/parser.py
index bb7c90c..be640f4 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -497,7 +497,7 @@ def _parse_samples(self, samples, samp_fmt, site):
                             sampdat[i] = int(vals)
                         except ValueError:
                             sampdat[i] = float(vals)
-                    elif entry_type == 'Float':
+                    elif entry_type == 'Float' or entry_type == 'Numeric':
                         sampdat[i] = float(vals)
                     else:
                         sampdat[i] = vals

From ef406459cccbf63976b937b0a097f2a03dcd19e1 Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Sat, 28 Jan 2017 13:01:25 +0100
Subject: [PATCH 161/168] Unify tested versions, including Python 3.5/3.6 and
 PyPy

---
 .travis.yml | 1 +
 setup.py    | 4 ++++
 tox.ini     | 6 +++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 658f857..b346129 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,6 +11,7 @@ python:
   - "3.3"
   - "3.4"
   - "3.5"
+  - "3.6"
   - "nightly"
   - "pypy"
   - "pypy3"
diff --git a/setup.py b/setup.py
index d8089c0..a6e0595 100644
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,10 @@
         'Programming Language :: Python :: 3.2',
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: PyPy',
         'Topic :: Scientific/Engineering :: Bio-Informatics',
       ],
     keywords='bioinformatics',
diff --git a/tox.ini b/tox.ini
index 64a7ab4..394251d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py26, py27, py32, py33, py34
+envlist = py26, py27, py32, py33, py34, py35, py36, pypy, pypy3
 
 [testenv]
 deps =
@@ -20,3 +20,7 @@ deps =
 [testenv:pypy]
 deps =
     -rrequirements/pypy-requirements.txt
+
+[testenv:pypy3]
+deps =
+    -rrequirements/pypy-requirements.txt

From 60ae36f93ffc82bcaad0ab08f5f07d7ebdfc1201 Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Sat, 28 Jan 2017 13:02:09 +0100
Subject: [PATCH 162/168] Fix Tox warning by using clean command instead of rm

---
 tox.ini | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index 394251d..fb00aa3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,8 +10,7 @@ envlist = py26, py27, py32, py33, py34, py35, py36, pypy, pypy3
 deps =
     -rrequirements/common-requirements.txt
 commands =
-    rm -rf {toxinidir}/build
-    python setup.py test
+    python setup.py clean --all test
 
 [testenv:py26]
 deps =

From cc005deb32c64b6c7648b39d3e2517c68a215a48 Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Sat, 28 Jan 2017 13:04:49 +0100
Subject: [PATCH 163/168] Drop Python 2.6, since PySAM needs sysconfig

---
 .travis.yml                             | 3 +--
 requirements/python2.6-requirements.txt | 5 -----
 setup.py                                | 8 --------
 tox.ini                                 | 6 +-----
 4 files changed, 2 insertions(+), 20 deletions(-)
 delete mode 100644 requirements/python2.6-requirements.txt

diff --git a/.travis.yml b/.travis.yml
index b346129..221bd18 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,7 +5,6 @@ cache:
   directories:
     - $HOME/.cache/pip
 python:
-  - "2.6"
   - "2.7"
   - "3.2"
   - "3.3"
@@ -16,6 +15,6 @@ python:
   - "pypy"
   - "pypy3"
 install:
-  - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install -r requirements/python2.6-requirements.txt; elif [[ $TRAVIS_PYTHON_VERSION == 'pypy' ]] || [[ $TRAVIS_PYTHON_VERSION == 'pypy3' ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi"
+  - if [[ "$TRAVIS_PYTHON_VERSION" =~ ^pypy ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi
   - python setup.py install
 script: python setup.py test
diff --git a/requirements/python2.6-requirements.txt b/requirements/python2.6-requirements.txt
deleted file mode 100644
index 27c9bc2..0000000
--- a/requirements/python2.6-requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
--r common-requirements.txt
-argparse
-counter
-ordereddict
-unittest2
diff --git a/setup.py b/setup.py
index a6e0595..0bfd710 100644
--- a/setup.py
+++ b/setup.py
@@ -8,15 +8,8 @@
 except:
     CYTHON = False
 
-IS_PYTHON26 = sys.version_info[:2] == (2, 6)
-
 DEPENDENCIES = ['setuptools']
 
-if IS_PYTHON26:
-    DEPENDENCIES.extend(['argparse', 'counter', 'ordereddict',
-                         'unittest2'])
-
-
 # get the version without an import
 VERSION = "Undefined"
 DOC = ""
@@ -68,7 +61,6 @@
         'Programming Language :: Cython',
         'Programming Language :: Python',
         'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.2',
diff --git a/tox.ini b/tox.ini
index fb00aa3..d6a9c09 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py26, py27, py32, py33, py34, py35, py36, pypy, pypy3
+envlist = py27, py32, py33, py34, py35, py36, pypy, pypy3
 
 [testenv]
 deps =
@@ -12,10 +12,6 @@ deps =
 commands =
     python setup.py clean --all test
 
-[testenv:py26]
-deps =
-    -rrequirements/python2.6-requirements.txt
-
 [testenv:pypy]
 deps =
     -rrequirements/pypy-requirements.txt

From d8839579d90c203425097bafb04a4f8fba747307 Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Sat, 28 Jan 2017 16:59:11 +0100
Subject: [PATCH 164/168] Drop 3.2/3.3, since PySAM does not build with them

---
 .travis.yml | 2 --
 setup.py    | 2 --
 tox.ini     | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 221bd18..ad315b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,6 @@ cache:
     - $HOME/.cache/pip
 python:
   - "2.7"
-  - "3.2"
-  - "3.3"
   - "3.4"
   - "3.5"
   - "3.6"
diff --git a/setup.py b/setup.py
index 0bfd710..b865b8d 100644
--- a/setup.py
+++ b/setup.py
@@ -63,8 +63,6 @@
         'Programming Language :: Python :: 2',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.2',
-        'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
diff --git a/tox.ini b/tox.ini
index d6a9c09..af7049e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27, py32, py33, py34, py35, py36, pypy, pypy3
+envlist = py27, py34, py35, py36, pypy, pypy3
 
 [testenv]
 deps =

From a9b1731ac555b32e0f146be46789c3c8668a4903 Mon Sep 17 00:00:00 2001
From: Eric <ericfeldman93@gmail.com>
Date: Wed, 1 Feb 2017 14:32:33 +0200
Subject: [PATCH 165/168] handle empty string as none

---
 vcf/parser.py                   |  4 ++--
 vcf/test/bad-info-character.vcf |  8 ++++++++
 vcf/test/test_vcf.py            | 10 ++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 vcf/test/bad-info-character.vcf

diff --git a/vcf/parser.py b/vcf/parser.py
index be640f4..c3c3d08 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -354,9 +354,9 @@ def _parse_metainfo(self):
         self.samples = fields[9:]
         self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
 
-    def _map(self, func, iterable, bad='.'):
+    def _map(self, func, iterable, bad=['.', '']):
         '''``map``, but make bad values None.'''
-        return [func(x) if x != bad else None
+        return [func(x) if x not in bad else None
                 for x in iterable]
 
     def _parse_filter(self, filt_str):
diff --git a/vcf/test/bad-info-character.vcf b/vcf/test/bad-info-character.vcf
new file mode 100644
index 0000000..8b23ae4
--- /dev/null
+++ b/vcf/test/bad-info-character.vcf
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.1
+##INFO=<ID=FLOAT_1,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=CHAR_1,Number=1,Type=Character,Description="A character value">
+##INFO=<ID=FLOAT_N,Number=.,Type=Float,Description="Floating point values">
+##INFO=<ID=CHAR_N,Number=.,Type=Character,Description="Character values">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample
+chr1	100	id1	G	A	.	.	EMPTY=;DOT=.;NOTEMPTY=6	GT	0/1
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index a09b0b9..9dca0bd 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -393,6 +393,16 @@ def test_write(self):
             self.assertEquals(l.INFO, r.INFO)
 
 
+class TestInfoBadInfoFields(unittest.TestCase):
+    def test_parse(self):
+        reader = vcf.Reader(fh('bad-info-character.vcf'))
+        record = next(reader)
+        self.assertEquals(record.INFO['DOT'], [None])
+        self.assertEquals(record.INFO['EMPTY'], [None])
+        self.assertEquals(record.INFO['NOTEMPTY'], ['6'])
+        pass
+
+
 class TestParseMetaLine(unittest.TestCase):
     def test_parse(self):
         reader = vcf.Reader(fh('parse-meta-line.vcf'))

From 3b76ada9beda23553456919e8eb9cd34dcb36623 Mon Sep 17 00:00:00 2001
From: Eric <ericfeldman93@gmail.com>
Date: Thu, 2 Feb 2017 11:34:43 +0200
Subject: [PATCH 166/168] CR comments

---
 vcf/cparse.pyx                  |  4 ++--
 vcf/test/bad-info-character.vcf | 14 +++++++++-----
 vcf/test/test_vcf.py            | 13 +++++++++----
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index 87f806d..334542a 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -1,8 +1,8 @@
 from model import _Call
 
-cdef _map(func, iterable, bad='.'):
+cdef _map(func, iterable, bad=['.', '']):
     '''``map``, but make bad values None.'''
-    return [func(x) if x != bad else None
+    return [func(x) if x not in bad else None
             for x in iterable]
 
 INTEGER = 'Integer'
diff --git a/vcf/test/bad-info-character.vcf b/vcf/test/bad-info-character.vcf
index 8b23ae4..93b87e1 100644
--- a/vcf/test/bad-info-character.vcf
+++ b/vcf/test/bad-info-character.vcf
@@ -1,8 +1,12 @@
 ##fileformat=VCFv4.1
-##INFO=<ID=FLOAT_1,Number=1,Type=Float,Description="A floating point value">
-##INFO=<ID=CHAR_1,Number=1,Type=Character,Description="A character value">
-##INFO=<ID=FLOAT_N,Number=.,Type=Float,Description="Floating point values">
-##INFO=<ID=CHAR_N,Number=.,Type=Character,Description="Character values">
+##INFO=<ID=EMPTY,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=EMPTY_6,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=EMPTY_N,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=DOT,Number=1,Type=Character,Description="A character value">
+##INFO=<ID=DOT_N,Number=1,Type=Character,Description="A character value">
+##INFO=<ID=DOT_6,Number=1,Type=Character,Description="A character value">
+##INFO=<ID=NOTEMPTY,Number=.,Type=Float,Description="Floating point values">
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="HapMap2 membership">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample
-chr1	100	id1	G	A	.	.	EMPTY=;DOT=.;NOTEMPTY=6	GT	0/1
+chr1	100	id1	G	A	.	.	FLAG;EMPTY=;EMPTY_6=;EMPTY_N=;DOT=.;DOT_6=.;DOT_N=.;NOTEMPTY=6	GT	0/1
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 9dca0bd..0d107b2 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -393,13 +393,17 @@ def test_write(self):
             self.assertEquals(l.INFO, r.INFO)
 
 
-class TestInfoBadInfoFields(unittest.TestCase):
+class TestBadInfoFields(unittest.TestCase):
     def test_parse(self):
         reader = vcf.Reader(fh('bad-info-character.vcf'))
         record = next(reader)
-        self.assertEquals(record.INFO['DOT'], [None])
-        self.assertEquals(record.INFO['EMPTY'], [None])
-        self.assertEquals(record.INFO['NOTEMPTY'], ['6'])
+        self.assertEquals(record.INFO['DOT'], None)
+        self.assertEquals(record.INFO['DOT_6'], None)
+        self.assertEquals(record.INFO['DOT_N'], None)
+        self.assertEquals(record.INFO['EMPTY'], None)
+        self.assertEquals(record.INFO['EMPTY_6'], None)
+        self.assertEquals(record.INFO['EMPTY_N'], None)
+        self.assertEquals(record.INFO['NOTEMPTY'], [6])
         pass
 
 
@@ -1734,3 +1738,4 @@ def test_strelka(self):
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGATKMeta))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncalledGenotypes))
 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStrelka))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBadInfoFields))

From a2f4a4405e79b22bbe85ce987c33a56e0fbf56cc Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Mon, 6 Feb 2017 00:10:24 +0100
Subject: [PATCH 167/168] More testing for issue 264

---
 vcf/test/bad-info-character.vcf | 18 ++++++++++--------
 vcf/test/test_vcf.py            | 18 +++++++++++-------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/vcf/test/bad-info-character.vcf b/vcf/test/bad-info-character.vcf
index 93b87e1..099470c 100644
--- a/vcf/test/bad-info-character.vcf
+++ b/vcf/test/bad-info-character.vcf
@@ -1,12 +1,14 @@
 ##fileformat=VCFv4.1
-##INFO=<ID=EMPTY,Number=1,Type=Float,Description="A floating point value">
-##INFO=<ID=EMPTY_6,Number=1,Type=Float,Description="A floating point value">
-##INFO=<ID=EMPTY_N,Number=1,Type=Float,Description="A floating point value">
-##INFO=<ID=DOT,Number=1,Type=Character,Description="A character value">
-##INFO=<ID=DOT_N,Number=1,Type=Character,Description="A character value">
-##INFO=<ID=DOT_6,Number=1,Type=Character,Description="A character value">
-##INFO=<ID=NOTEMPTY,Number=.,Type=Float,Description="Floating point values">
+##INFO=<ID=EMPTY_1,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=EMPTY_3,Number=3,Type=Float,Description="Floating point values">
+##INFO=<ID=EMPTY_N,Number=.,Type=Float,Description="Floating point values">
+##INFO=<ID=DOT_1,Number=1,Type=Character,Description="A character value">
+##INFO=<ID=DOT_3,Number=3,Type=Character,Description="Character values">
+##INFO=<ID=DOT_N,Number=.,Type=Character,Description="Character values">
+##INFO=<ID=NOTEMPTY_1,Number=1,Type=Float,Description="A floating point value">
+##INFO=<ID=NOTEMPTY_3,Number=3,Type=Float,Description="Floating point values">
+##INFO=<ID=NOTEMPTY_N,Number=.,Type=Float,Description="Floating point values">
 ##INFO=<ID=FLAG,Number=0,Type=Flag,Description="HapMap2 membership">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample
-chr1	100	id1	G	A	.	.	FLAG;EMPTY=;EMPTY_6=;EMPTY_N=;DOT=.;DOT_6=.;DOT_N=.;NOTEMPTY=6	GT	0/1
+chr1	100	id1	G	A	.	.	FLAG;EMPTY_1=;EMPTY_3=;EMPTY_N=;DOT_1=.;DOT_3=.,.,.;DOT_N=.;NOTEMPTY_1=1;NOTEMPTY_3=1,2,3;NOTEMPTY_N=1	GT	0/1
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index 0d107b2..b2e3121 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -397,13 +397,17 @@ class TestBadInfoFields(unittest.TestCase):
     def test_parse(self):
         reader = vcf.Reader(fh('bad-info-character.vcf'))
         record = next(reader)
-        self.assertEquals(record.INFO['DOT'], None)
-        self.assertEquals(record.INFO['DOT_6'], None)
-        self.assertEquals(record.INFO['DOT_N'], None)
-        self.assertEquals(record.INFO['EMPTY'], None)
-        self.assertEquals(record.INFO['EMPTY_6'], None)
-        self.assertEquals(record.INFO['EMPTY_N'], None)
-        self.assertEquals(record.INFO['NOTEMPTY'], [6])
+        self.assertEquals(record.INFO['DOT_1'], None)
+        self.assertEquals(record.INFO['DOT_3'], [None, None, None])
+        self.assertEquals(record.INFO['DOT_N'], [None])
+        self.assertEquals(record.INFO['EMPTY_1'], None)
+        # Perhaps EMPTY_3 should yield [None, None, None] but this is really a
+        # cornercase of unspecified behaviour.
+        self.assertEquals(record.INFO['EMPTY_3'], [None])
+        self.assertEquals(record.INFO['EMPTY_N'], [None])
+        self.assertEquals(record.INFO['NOTEMPTY_1'], 1)
+        self.assertEquals(record.INFO['NOTEMPTY_3'], [1, 2, 3])
+        self.assertEquals(record.INFO['NOTEMPTY_N'], [1])
         pass
 
 

From d91ec5ed4e85fc34dc6942eca70dd869d75d1931 Mon Sep 17 00:00:00 2001
From: Sam Brightman <sam.brightman@gmail.com>
Date: Tue, 14 Feb 2017 16:32:52 +0100
Subject: [PATCH 168/168] Allow partially-called genotypes to be considered
 called

---
 vcf/model.py                    |  4 ++--
 vcf/test/test_vcf.py            | 10 ++++++++++
 vcf/test/uncalled_genotypes.vcf |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/vcf/model.py b/vcf/model.py
index 375a3f8..34a4d17 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -26,7 +26,7 @@ def __init__(self, site, sample, data):
         if getattr(self.data, 'GT', None) is not None:
             self.gt_alleles = [(al if al != '.' else None) for al in allele_delimiter.split(self.data.GT)]
             self.ploidity = len(self.gt_alleles)
-            self.called = all([al != None for al in self.gt_alleles])
+            self.called = any(al is not None for al in self.gt_alleles)
             self.gt_nums = self.data.GT if self.called else None
         else:
             #62 a call without a genotype is not defined as called or not
@@ -65,7 +65,7 @@ def gt_bases(self):
         if self.called:
             # lookup and return the actual DNA alleles
             try:
-                return self.gt_phase_char().join(str(self.site.alleles[int(X)]) for X in self.gt_alleles)
+                return self.gt_phase_char().join(str(self.site.alleles[int(X)] if X is not None else '.') for X in self.gt_alleles)
             except:
                 sys.stderr.write("Allele number not found in list of alleles\n")
         else:
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
index a09b0b9..deeff01 100644
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1639,22 +1639,32 @@ def test_read_uncalled(self):
             gt_nums = [s.gt_nums for s in var.samples]
             ploidity = [s.ploidity for s in var.samples]
             gt_alleles = [s.gt_alleles for s in var.samples]
+            gt_type = [s.gt_type for s in var.samples]
 
             if var.POS == 14370:
                 self.assertEqual(['0|0', None, '1/1'], gt_nums)
                 self.assertEqual(['G|G', None, 'A/A'], gt_bases)
                 self.assertEqual([2,2,2], ploidity)
                 self.assertEqual([['0','0'], [None,None], ['1','1']], gt_alleles)
+                self.assertEqual([0, None, 2], gt_type)
             elif var.POS == 17330:
                 self.assertEqual([None, '0|1', '0/0'], gt_nums)
                 self.assertEqual([None, 'T|A', 'T/T'], gt_bases)
                 self.assertEqual([3,2,2], ploidity)
                 self.assertEqual([[None,None,None], ['0','1'], ['0','0']], gt_alleles)
+                self.assertEqual([None, 1, 0], gt_type)
             elif var.POS == 1234567:
                 self.assertEqual(['0/1', '0/2', None], gt_nums)
                 self.assertEqual(['GTC/G', 'GTC/GTCT', None], gt_bases)
                 self.assertEqual([2,2,1], ploidity)
                 self.assertEqual([['0','1'], ['0','2'], [None]], gt_alleles)
+                self.assertEqual([1, 1, None], gt_type)
+            elif var.POS == 1234568:
+                self.assertEqual(['./1', '0/.', None], gt_nums)
+                self.assertEqual(['./G', 'GTC/.', None], gt_bases)
+                self.assertEqual([2,2,1], ploidity)
+                self.assertEqual([[None,'1'], ['0',None], [None]], gt_alleles)
+                self.assertEqual([1, 1, None], gt_type)
         reader._reader.close()
 
 
diff --git a/vcf/test/uncalled_genotypes.vcf b/vcf/test/uncalled_genotypes.vcf
index 2032097..794aea7 100644
--- a/vcf/test/uncalled_genotypes.vcf
+++ b/vcf/test/uncalled_genotypes.vcf
@@ -5,3 +5,4 @@
 20	14370	rs6054257	G	A	29	PASS	NS=3	GT	0|0	./.	1/1
 20	17330	.	T	A	3	q10	NS=3	GT	././.	0|1	0/0
 20	1234567	microsat1	GTC	G,GTCT	50	PASS	NS=3	GT	0/1	0/2	.
+20	1234568	.	GTC	G,GTCT	50	PASS	NS=3	GT	./1	0/.	.