Source code for plot_ro

#!/usr/bin/env python2.7

import os
import sys
import argparse

import numpy as np
import matplotlib.pyplot as plt

import parsers
import samples_db
import cnv_struct
import validation

# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.

__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"

[docs]def main(args): """Plot the histogram of reciprocal overlap between both twins. Since the twins have the same genome, an ideal histogram would have a high density in the high reciprocal overlap region (> 90%). This allows fast assessement of the consistency of a given algorithm. """ ros = [] p = validation.get_parser_if_valid(args) cnvs = p.get_cnvs() sample_ids = samples_db.get_sample_ids_for_family(p.family_id) for sample in cnvs: other_sample = None if sample == "twin1": other_sample = "twin2" else: other_sample = "twin1" if sample.startswith("twin"): sample_id = sample_ids[sample] status = sample for chr in cnvs[sample]: for cnv in cnvs[sample][chr]: # If twin mode: check for reciprocal_overlap # the info field will have the reciprocal overlap. ro = cnv_struct.ro_to_best_in_list(cnv, cnvs[other_sample][chr]) ros.append(np.array(ro)) ros = np.array(ros) fig = plt.figure() axes = fig.add_subplot(111) axes.hist( ros, color=("#33B5E5", "#FF4444"), weights=np.zeros_like(ros) + 1.0 / len(ros) ) axes.set_xlabel("Reciprocal Overlap") axes.set_ylabel("Proportion") if args.title != "": axes.set_title(args.title) if args.save is None: plt.show() else: plt.savefig(args.save)
if __name__ == "__main__": parser = argparse.ArgumentParser(description = ("Plots the ROs for the " "twins")) parser = validation.add_pickle_args(parser) parser.add_argument( "--format", type=str, choices=parsers.__all__, help="The file format for the parser." ) parser.add_argument( "--base_dir", "-i", type=str, help=("The base directory for the calls. A directory " "with four subfodlers starting with father, " "mother, twin1 and twin2 are expected.") ) parser.add_argument( "--title", type=str, default="", help="Title for the plot." ) parser.add_argument("--save", type=str, help="Filename of the output graph file.", default=None ) main(parser.parse_args())