#!/usr/bin/env python2.7
# Functions to write bed files.  
import os
import sys
import argparse
import parsers
import samples_db
import cnv_struct
import validation
# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.
__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"
colors = ["204,0,0", "255,68,68", "0,0,0", "153,254,0", "102,153,0"]
[docs]def main(args):
    """If called as a script, this script will take either a 'base_dir', the
    path representing a directory with the calls for every family as described
    in the README, or a pickle (serialized version of the family information).
    Either twins or parents can also be provided, depending on which bed files
    the user wants to generate.
    """
    p = validation.get_parser_if_valid(args)
    cnvs = p.get_cnvs()
    family_id = p.family_id
    if args.base_dir:
        output_dir = os.path.join(args.base_dir, "{}_bed".format(args.who))
        algo = args.format
    else:
        output_dir = os.path.join(
                                    os.path.split(args.pickle)[0],
                                    "{}_bed".format(args.who)
                                 )
        algo = "generic"
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    if args.who == "parents":
        write_beds(cnvs, output_dir, family_id, False, algo)
    if args.who == "twins":
        write_beds(cnvs, output_dir, family_id, True, algo)
 
[docs]def write_beds(cnvs, output_dir, family_id, twins, algo):
    """Writes the bed files for a list of CNVs.
    :param cnvs: A family CNV structure.
    :type cnvs: dict
    :param output_dir: Output directory for the bed files.
    :type output_dir: str
    :param family_id: The family id for track identification (``e.g.`` 1443).
    :type family_id: str
    :param twins: Boolean indicating if we want to generate the beds for the 
                  twins or for the parents.
    :type twins: bool
    :param algo: The algorithm used to generate the CNV calls (for track
                 identification).
    :type algo: str
    """
    header_template = ("track name=\"{algo}_{family_id}_{sample_id}\" "
                       "description=\"{algo}_{family_id}_{sample_id} ({status})\" "
                       "visibility=2 itemRgb=\"On\"\n")
    line_template = "\t".join(["chr{chr}",
                                "{start}",
                                "{end}",
                                "{info}",
                                "0", "+",
                                "{start}",
                                "{end}",
                                "{color}\n"])
    sample_ids = samples_db.get_sample_ids_for_family(family_id)
    if twins:
        sample_filter = ("twin1", "twin2")
    else:
        sample_filter = ("mother", "father")
    for sample in cnvs:
        other_sample = None
        if twins:
            if sample == "twin1":
                other_sample = "twin2"
            else:
                other_sample = "twin1"
        if sample in sample_filter:
            output_file = os.path.join(output_dir, "{}.bed".format(sample))
            f = open(output_file, "w")
            sample_id = sample_ids[sample]
            status = sample
            # Print the header
            f.write(header_template.format(**locals()))
            for chr in cnvs[sample]:
                for cnv in cnvs[sample][chr]:
                    # If twin mode: check for reciprocal_overlap
                    # the info field will have the reciprocal overlap.
                    if twins:
                        ro = cnv_struct.ro_to_best_in_list(cnv, 
                                                           cnvs[other_sample][chr])
                    start = cnv.start
                    end = cnv.end
                    cn = cnv.cn
                    if cn < 2:
                        col_idx = 0
                    elif cn == 2:
                        col_idx = 1
                    else:
                        col_idx = 2
                    color = colors[col_idx]
                    if twins:
                        info = "CN={},RO={};{}".format(cn, *ro)
                    else:
                        info = "CN={}".format(cn)
                    f.write(line_template.format(**locals()))
            f.close()
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description = ("Generates desired bam files "
                                                     "can be called as an "
                                                     "autonomous script or "
                                                     "imported as a module."))
    parser = validation.add_pickle_args(parser)
    parser.add_argument(
                            "who",
                            type=str,
                            choices=["parents", "twins"],
                            help=("Write the bed files for either the parents "
                                  "or the twins.")
                       )
    parser.add_argument(
                            "--format",
                            type=str,
                            choices=parsers.__all__,
                            help="The file format for the parser."
                       )
    parser.add_argument(
                            "--base_dir", "-i",
                            type=str,
                            help=("The base directory for the calls. A directory "
                                  "with four subfodlers starting with father, "
                                  "mother, twin1 and twin2 are expected.")
                       )
    main(parser.parse_args())