Source code for parsers.omni25

import parsers

import os
import os.path
import re

import cnv_struct

# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.

__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"

[docs]class Parser(parsers.ParentParser): """Creates the CNV dictionary for a given family for calls from the QuantinSNP algorithm on the Omni2.5 genotyping platform. """ def __init__(self, base_dir): super(Parser, self).__init__(base_dir) def get_cnvs(self): for sample in self.paths: calls_path = os.path.join(self.paths[sample], "calls") calls_file = [i for i in os.listdir(calls_path) if i.endswith(".cnv")] if len(calls_file) != 1: raise Exception(("Can't identify a single call file in the " "'{}' directory.".format(self.paths[sample]))) calls_file = os.path.join(calls_path, calls_file[0]) with open(calls_file) as f: header = f.readline() header = header.rstrip("\r\n") header = header.split("\t") for line in f: line = line.rstrip("\r\n") line = line.split("\t") chromo = int(line[header.index("Chromosome")]) start = int(line[header.index("Start Position (bp)")]) end = int(line[header.index("End Position (bp)")]) cn = int(line[header.index("Copy Number")]) qual = float(line[header.index("Max. Log BF")]) pos = "chr{chromo}:{start}-{end}".format(**locals()) cnv_type = None if cn > 2: cnv_type = "gain" elif cn < 2: cnv_type = "loss" cnv = cnv_struct.cnv( pos = pos, type = cnv_type, algo = "QuantiSNP", confidence = qual, source = sample, ) if not self.cnvs.get(sample): self.cnvs[sample] = {} if not self.cnvs[sample].get(chromo): self.cnvs[sample][chromo] = [] self.cnvs[sample][chromo].append(cnv) # Add empty lists for missing chromosomes. for sample in self.cnvs: chromos = self.cnvs[sample].keys() missing_chr = (set(range(1, 23)) - set(chromos)) for chromo in missing_chr: self.cnvs[sample][chromo] = [] return self.cnvs