Source code for compare_dgv
#!/usr/bin/env python2.7
import os
import sys
import argparse
import pickle
import numpy as np
import matplotlib.pyplot as plt
import parsers
import samples_db
import cnv_struct
import validation
import cnv_db.db as db
# This file is part of CNVAnalysisToolkit.
#
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit. If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
[docs]def main(args):
"""Using this utility as a script creates an histogram of the reciprocal
overlap with the database of genomic variants (DGV) from the given dataset.
"""
ros = []
p = validation.get_parser_if_valid(args)
cnvs = p.get_cnvs()
sample_ids = samples_db.get_sample_ids_for_family(p.family_id)
algo = None
for sample in cnvs:
for chr in cnvs[sample]:
for cnv in cnvs[sample][chr]:
if not algo:
algo = cnv.algo
dgv_cnv_list = db.query_dgv_overlap(cnv)
if len(dgv_cnv_list) == 0:
# Didn't find any overlap to DGV.
cnv.dgv_ro = 0
ros.append(0)
else:
ro = cnv_struct.ro_to_best_in_list(cnv, dgv_cnv_list)
cnv.dgv_ro = ro[0]
ros.append(ro[0])
with open("{}_{}_dgv_overlap.pickle".format(p.family_id, algo), "wb") as f:
pickle.dump(cnvs, f)
ros = np.array(ros)
fig = plt.figure()
axes = fig.add_subplot(111)
axes.hist(
ros,
color=("#33B5E5", ),
weights=np.zeros_like(ros) + 1.0 / len(ros)
)
axes.set_xlabel("Overlap to DGV")
axes.set_ylabel("Proportion")
if args.title != "":
axes.set_title(args.title)
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description = ("Plots the ROs for the "
"DGV."))
parser = validation.add_pickle_args(parser)
parser.add_argument(
"--format",
type=str,
choices=parsers.__all__,
help="The file format for the parser."
)
parser.add_argument(
"--base_dir", "-i",
type=str,
help=("The base directory for the calls. A directory "
"with four subfodlers starting with father, "
"mother, twin1 and twin2 are expected.")
)
parser.add_argument(
"--title",
type=str,
default="",
help="Title for the plot."
)
main(parser.parse_args())