Source code for parsers.omni25
import parsers
import os
import os.path
import re
import cnv_struct
# This file is part of CNVAnalysisToolkit.
#
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit. If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
[docs]class Parser(parsers.ParentParser):
"""Creates the CNV dictionary for a given family for calls from the QuantinSNP algorithm on the Omni2.5 genotyping platform.
"""
def __init__(self, base_dir):
super(Parser, self).__init__(base_dir)
def get_cnvs(self):
for sample in self.paths:
calls_path = os.path.join(self.paths[sample], "calls")
calls_file = [i for i in os.listdir(calls_path) if i.endswith(".cnv")]
if len(calls_file) != 1:
raise Exception(("Can't identify a single call file in the "
"'{}' directory.".format(self.paths[sample])))
calls_file = os.path.join(calls_path, calls_file[0])
with open(calls_file) as f:
header = f.readline()
header = header.rstrip("\r\n")
header = header.split("\t")
for line in f:
line = line.rstrip("\r\n")
line = line.split("\t")
chromo = int(line[header.index("Chromosome")])
start = int(line[header.index("Start Position (bp)")])
end = int(line[header.index("End Position (bp)")])
cn = int(line[header.index("Copy Number")])
qual = float(line[header.index("Max. Log BF")])
pos = "chr{chromo}:{start}-{end}".format(**locals())
cnv_type = None
if cn > 2:
cnv_type = "gain"
elif cn < 2:
cnv_type = "loss"
cnv = cnv_struct.cnv(
pos = pos,
type = cnv_type,
algo = "QuantiSNP",
confidence = qual,
source = sample,
)
if not self.cnvs.get(sample):
self.cnvs[sample] = {}
if not self.cnvs[sample].get(chromo):
self.cnvs[sample][chromo] = []
self.cnvs[sample][chromo].append(cnv)
# Add empty lists for missing chromosomes.
for sample in self.cnvs:
chromos = self.cnvs[sample].keys()
missing_chr = (set(range(1, 23)) - set(chromos))
for chromo in missing_chr:
self.cnvs[sample][chromo] = []
return self.cnvs