Source code for plot_cnv_size_dgv_ro
#!/usr/bin/env python2.7
import pickle
import argparse
import mendelian
import parsers
import cnv_db.db
import cnv_struct
import matplotlib.pyplot as plt
import numpy as np
# This file is part of CNVAnalysisToolkit.
#
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit. If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
[docs]def main(args):
"""Plots the best reciprocal to a variant in DGV as a function of CNV size.
A possible correlation would indicate a biais in the database towards CNVs
of a given size.
"""
with open(args.pickle) as f:
cnvs = pickle.load(f)
if type(cnvs) is not list:
cnvs = parsers.family_to_list(cnvs)
for cnv in cnvs:
# Manually get the overlap to DGV
dgv_cnvs = cnv_db.db.query_dgv_overlap(cnv)
best_ro = cnv_struct.ro_to_best_in_list(cnv, dgv_cnvs)[0]
cnv.dgv_ro = best_ro
# Convert to plottable array.
dgv_overlap_size = []
quals = []
for cnv in cnvs:
if args.plot_confidence:
if cnv.confidence:
quals.append(cnv.confidence)
dgv_overlap_size.append(np.array([cnv.dgv_ro, cnv.size / 1000.0]))
else:
dgv_overlap_size.append(np.array([cnv.dgv_ro, cnv.size / 1000.0]))
dgv_overlap_size = np.array(dgv_overlap_size)
quals = np.array(quals)
fig = plt.figure()
if len(quals) > 0:
plt.scatter(
dgv_overlap_size[:, 1],
dgv_overlap_size[:, 0],
marker = "o",
c = quals,
cmap = plt.cm.gist_yarg,
edgecolors = "none",
)
else:
plt.scatter(dgv_overlap_size[:, 1], dgv_overlap_size[:, 0], marker = ".")
plt.ylabel("Overlap to DGV")
plt.xlabel("Size (kilobase)")
if args.save is None:
plt.show()
else:
plt.savefig(args.save)
if __name__ == "__main__":
PARSER = argparse.ArgumentParser("This script takes a pickle and generates "
"the plot of the overlap to DGV with the "
"CNV sizes for either a full family or "
"the Mendelian CNVs, exlusively.")
PARSER.add_argument("--pickle",
type=str,
help=("The pickle containing either a CNV list or a "
"dictionary like structure containing the CNVs "
"for the whole family."),
required=True,
)
PARSER.add_argument("--plot_confidence",
action='store_true'
)
PARSER.add_argument("--save",
type=str,
help="Filename of the output graph file.",
default=None
)
main(PARSER.parse_args())