Source code for plot_ro
#!/usr/bin/env python2.7
import os
import sys
import argparse
import numpy as np
import matplotlib.pyplot as plt
import parsers
import samples_db
import cnv_struct
import validation
# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
[docs]def main(args):
    """Plot the histogram of reciprocal overlap between both twins.
    Since the twins have the same genome, an ideal histogram would have a high
    density in the high reciprocal overlap region (> 90%).
    This allows fast assessement of the consistency of a given algorithm.
    """
    ros = []
        
    p = validation.get_parser_if_valid(args)
    cnvs = p.get_cnvs()
    sample_ids = samples_db.get_sample_ids_for_family(p.family_id)
    for sample in cnvs:
        other_sample = None
        if sample == "twin1":
            other_sample = "twin2"
        else:
            other_sample = "twin1"
        if sample.startswith("twin"):
            sample_id = sample_ids[sample]
            status = sample
            for chr in cnvs[sample]:
                for cnv in cnvs[sample][chr]:
                    # If twin mode: check for reciprocal_overlap
                    # the info field will have the reciprocal overlap.
                    ro = cnv_struct.ro_to_best_in_list(cnv, 
                                                       cnvs[other_sample][chr])
                    ros.append(np.array(ro))
    ros = np.array(ros)
    fig = plt.figure()
    axes = fig.add_subplot(111)
    axes.hist(
                ros,
                color=("#33B5E5", "#FF4444"),
                weights=np.zeros_like(ros) + 1.0 / len(ros)
             )
    axes.set_xlabel("Reciprocal Overlap")
    axes.set_ylabel("Proportion")
    if args.title != "":
        axes.set_title(args.title)
    if args.save is None:
        plt.show()
    else:
        plt.savefig(args.save)
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description = ("Plots the ROs for the "
                                                    "twins"))
    parser = validation.add_pickle_args(parser)
    parser.add_argument(
                            "--format",
                            type=str,
                            choices=parsers.__all__,
                            help="The file format for the parser."
                       )
    parser.add_argument(
                            "--base_dir", "-i",
                            type=str,
                            help=("The base directory for the calls. A directory "
                                  "with four subfodlers starting with father, "
                                  "mother, twin1 and twin2 are expected.")
                       )
    parser.add_argument(
                            "--title",
                            type=str,
                            default="",
                            help="Title for the plot."
                       )
    parser.add_argument("--save",
                    type=str,
                    help="Filename of the output graph file.",
                    default=None
                   )
    main(parser.parse_args())