Source code for parsers.cnver

# Parser for CNVer output CNV calls.

import re
import os

import cnv_struct
import parsers

# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.

__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"

[docs]class Parser(parsers.ParentParser): """Creates the CNV dictionary for a given family for calls from the CNVer algorithm. Medvedev, P., Fiume, M., Smith, T., Brudno, M. (2010). Detecting copy number variation with mated short reads, `Genome Research`, **20**:1613-1622. """ def __init__(self, family_root): super(Parser, self).__init__(family_root) def get_cnvs(self): # cnvs = { "twin1": {1: [cnvs_for_chr1, ], # 2: [cnvs_for_chr2, ], # ... # } # "twin2": ... } for sample in self.paths: self.cnvs[sample] = {} for i in xrange(1, 23): self.cnvs[sample][i] = [] chr_file = os.path.join( self.family_root, self.paths[sample], "calls", "chr{}.cnvs".format(i) ) cols = ["chr", "start", "end", "cnv_type", "doc"] with open(chr_file) as f: for line in f: line = line.rstrip("\r\n") fields = dict(zip(cols, line.split("\t"))) pos = "{chr}:{start}-{end}".format(**fields) doc = None try: doc = float(fields["doc"]) except Exception as e: print ("An error occured while trying to parse " "the following line: ") print line print print ("From the file:") print f print raise e cnv_builder = { "pos": pos, "type": fields["cnv_type"], "algo": "cnver", "doc": doc, "source": sample } cnv = cnv_struct.cnv(**cnv_builder) self.cnvs[sample][i].append(cnv) return self.cnvs