Source code for plot_cnv_size_dgv_ro

#!/usr/bin/env python2.7

import pickle
import argparse

import mendelian
import parsers
import cnv_db.db
import cnv_struct

import matplotlib.pyplot as plt
import numpy as np

# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.

__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"

[docs]def main(args): """Plots the best reciprocal to a variant in DGV as a function of CNV size. A possible correlation would indicate a biais in the database towards CNVs of a given size. """ with open(args.pickle) as f: cnvs = pickle.load(f) if type(cnvs) is not list: cnvs = parsers.family_to_list(cnvs) for cnv in cnvs: # Manually get the overlap to DGV dgv_cnvs = cnv_db.db.query_dgv_overlap(cnv) best_ro = cnv_struct.ro_to_best_in_list(cnv, dgv_cnvs)[0] cnv.dgv_ro = best_ro # Convert to plottable array. dgv_overlap_size = [] quals = [] for cnv in cnvs: if args.plot_confidence: if cnv.confidence: quals.append(cnv.confidence) dgv_overlap_size.append(np.array([cnv.dgv_ro, cnv.size / 1000.0])) else: dgv_overlap_size.append(np.array([cnv.dgv_ro, cnv.size / 1000.0])) dgv_overlap_size = np.array(dgv_overlap_size) quals = np.array(quals) fig = plt.figure() if len(quals) > 0: plt.scatter( dgv_overlap_size[:, 1], dgv_overlap_size[:, 0], marker = "o", c = quals, cmap = plt.cm.gist_yarg, edgecolors = "none", ) else: plt.scatter(dgv_overlap_size[:, 1], dgv_overlap_size[:, 0], marker = ".") plt.ylabel("Overlap to DGV") plt.xlabel("Size (kilobase)") if args.save is None: plt.show() else: plt.savefig(args.save)
if __name__ == "__main__": PARSER = argparse.ArgumentParser("This script takes a pickle and generates " "the plot of the overlap to DGV with the " "CNV sizes for either a full family or " "the Mendelian CNVs, exlusively.") PARSER.add_argument("--pickle", type=str, help=("The pickle containing either a CNV list or a " "dictionary like structure containing the CNVs " "for the whole family."), required=True, ) PARSER.add_argument("--plot_confidence", action='store_true' ) PARSER.add_argument("--save", type=str, help="Filename of the output graph file.", default=None ) main(PARSER.parse_args())