Source code for parsers.cnver
# Parser for CNVer output CNV calls.
import re
import os
import cnv_struct
import parsers
# This file is part of CNVAnalysisToolkit.
#
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit. If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
[docs]class Parser(parsers.ParentParser):
"""Creates the CNV dictionary for a given family for calls from the CNVer algorithm.
Medvedev, P., Fiume, M., Smith, T., Brudno, M. (2010). Detecting copy number variation with mated short reads, `Genome Research`, **20**:1613-1622.
"""
def __init__(self, family_root):
super(Parser, self).__init__(family_root)
def get_cnvs(self):
# cnvs = { "twin1": {1: [cnvs_for_chr1, ],
# 2: [cnvs_for_chr2, ],
# ...
# }
# "twin2": ... }
for sample in self.paths:
self.cnvs[sample] = {}
for i in xrange(1, 23):
self.cnvs[sample][i] = []
chr_file = os.path.join(
self.family_root,
self.paths[sample],
"calls",
"chr{}.cnvs".format(i)
)
cols = ["chr", "start", "end", "cnv_type", "doc"]
with open(chr_file) as f:
for line in f:
line = line.rstrip("\r\n")
fields = dict(zip(cols, line.split("\t")))
pos = "{chr}:{start}-{end}".format(**fields)
doc = None
try:
doc = float(fields["doc"])
except Exception as e:
print ("An error occured while trying to parse "
"the following line: ")
print line
print
print ("From the file:")
print f
print
raise e
cnv_builder = {
"pos": pos,
"type": fields["cnv_type"],
"algo": "cnver",
"doc": doc,
"source": sample
}
cnv = cnv_struct.cnv(**cnv_builder)
self.cnvs[sample][i].append(cnv)
return self.cnvs