#!/usr/bin/env python2.7
# Computes the average CNV size and std.
import sys
import argparse
import numpy as np
import matplotlib.pyplot as plt
import validation
import parsers
import cnv_struct
# This file is part of CNVAnalysisToolkit.
#
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit. If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
[docs]def main(args):
"""Plots the depth of coverage as a function of the size for both gains and
losses.
This can be useful in depth of coverage algorithms to insure that calls are
concordant with the depth of coverage ratio and to identify systematic biais
in gains and losses with respect to CNV size.
.. figure:: _static/size_doc_scatter.png
:width: 600px
:alt: Example of a doc size scatter.
**Figure 1.** Example illustrating typical case of depth of coverage based
calls where DOC < 1 implies a deletion and DOC > 1 implies a gain. Here,
we can see a slight biais towards larger CNVs for gains.
Legend: Red is for gains and blue for deletions.
"""
# List of 2D vectors of size, doc.
size_doc_gain = []
size_doc_loss = []
locs = []
source = []
p = validation.get_parser_if_valid(args)
# Get the CNVs
cnvs = p.get_cnvs()
for sample in cnvs:
other = None
if sample == "twin1":
other = "twin2"
elif sample == "twin2":
other = "twin1"
if other:
for chr in cnvs[sample]:
for cnv in cnvs[sample][chr]:
min_ro = min(cnv_struct.ro_to_best_in_list(
cnv,
cnvs[other][chr]
))
father_ro = cnv_struct.ro_to_best_in_list(
cnv,
cnvs["father"][chr]
)
mother_ro = cnv_struct.ro_to_best_in_list(
cnv,
cnvs["mother"][chr]
)
max_min_parent_ro = max([min(father_ro), min(mother_ro)])
store = True
if args.ro_over and args.enforce_parent_ro:
store = (min_ro >= args.ro_over and
max_min_parent_ro >= args.ro_over)
elif args.ro_over:
store = min_ro >= args.ro_over
elif args.enforce_parent_ro and not args.ro_over:
sys.exit(("When the `--enforce_parent_ro` option is "
"used, it is necessary to provide a "
"`--ro_over` threshold for this program "
"to filter out CNVs accordingly."))
if store:
if cnv.type == "gain":
size_doc_gain.append(
np.array([cnv.size / 1000.0, cnv.doc], dtype = float)
)
else:
size_doc_loss.append(
np.array([cnv.size / 1000.0, cnv.doc], dtype = float)
)
locs.append(cnv.get_pos())
source.append(cnv.source)
size_doc_gain = np.array(size_doc_gain, dtype=float)
size_doc_loss = np.array(size_doc_loss, dtype=float)
if args.plotsame:
plot_in_same_plot(size_doc_gain, size_doc_loss, args.save)
else:
plot_in_subplots(size_doc_gain, size_doc_loss, args.save)
[docs]def plot_in_same_plot(gain, loss, save):
"""Plots the gains and losses in a same subplot using colors to distinguish
them. This can be triggered using the --plotsame option.
"""
f = plt.figure()
plt.plot(gain[:, 0], gain[:, 1], 'r.')
plt.plot(loss[:, 0], loss[:, 1], 'b.')
plt.xlabel("Size (kb)")
plt.ylabel("DOC")
plt.axhline(y = 1, color = "#000000")
plt.xlim(0, 300)
if save is None:
plt.show()
else:
plt.savefig(save)
[docs]def plot_in_subplots(gain, loss, save):
"""Plots the gains and losses in separate subplots.
"""
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.plot(
gain[:, 0],
gain[:, 1],
'k.'
)
ax2.plot(
loss[:, 0],
loss[:, 1],
'k.'
)
ax1.set_xlabel("Size (kb)")
ax1.set_ylabel("DOC")
ax1.set_title("gain")
ax1.axhline(y = 1)
ax2.set_xlabel("Size (kb)")
ax2.set_ylabel("DOC")
ax2.set_title("loss")
ax2.axhline(y = 1)
if save is None:
plt.show()
else:
plt.savefig(save)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Computes the average and std CNV size.")
parser = validation.add_pickle_args(parser)
parser.add_argument("--format",
type=str,
choices=parsers.__all__,
help="The file format for the parser.")
parser.add_argument("--base_dir",
type=str,
help=("The base directory for the calls. A directory with "
"four subfolders starting with father, mother, twin1 "
"and twin2 are expected."))
parser.add_argument("--ro_over",
default=None,
type=float,
help="Overlap threshold for displayed CNVs."
)
parser.add_argument("--enforce_parent_ro",
action="store_true",
help=("Applies the ro_over threshold to the other twin "
"AND the parents.")
)
parser.add_argument("--plotsame",
action="store_true",
help="Plot in one or two windows."
)
parser.add_argument("--save",
type=str,
help="Filename of the output graph file.",
default=None
)
main(parser.parse_args())