Source code for bed

#!/usr/bin/env python2.7

# Functions to write bed files.  

import os
import sys
import argparse

import parsers
import samples_db
import cnv_struct
import validation

# This file is part of CNVAnalysisToolkit.
# 
# CNVAnalysisToolkit is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# CNVAnalysisToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with CNVAnalysisToolkit.  If not, see <http://www.gnu.org/licenses/>.

__author__ = "Marc-Andre Legault (StatGen)"
__copyright__ = "Copyright (C) 2013 StatGen"
__license__ = "GNU General Public License v3 (GPL-3)"

colors = ["204,0,0", "255,68,68", "0,0,0", "153,254,0", "102,153,0"]

[docs]def main(args): """If called as a script, this script will take either a 'base_dir', the path representing a directory with the calls for every family as described in the README, or a pickle (serialized version of the family information). Either twins or parents can also be provided, depending on which bed files the user wants to generate. """ p = validation.get_parser_if_valid(args) cnvs = p.get_cnvs() family_id = p.family_id if args.base_dir: output_dir = os.path.join(args.base_dir, "{}_bed".format(args.who)) algo = args.format else: output_dir = os.path.join( os.path.split(args.pickle)[0], "{}_bed".format(args.who) ) algo = "generic" if not os.path.isdir(output_dir): os.makedirs(output_dir) if args.who == "parents": write_beds(cnvs, output_dir, family_id, False, algo) if args.who == "twins": write_beds(cnvs, output_dir, family_id, True, algo)
[docs]def write_beds(cnvs, output_dir, family_id, twins, algo): """Writes the bed files for a list of CNVs. :param cnvs: A family CNV structure. :type cnvs: dict :param output_dir: Output directory for the bed files. :type output_dir: str :param family_id: The family id for track identification (``e.g.`` 1443). :type family_id: str :param twins: Boolean indicating if we want to generate the beds for the twins or for the parents. :type twins: bool :param algo: The algorithm used to generate the CNV calls (for track identification). :type algo: str """ header_template = ("track name=\"{algo}_{family_id}_{sample_id}\" " "description=\"{algo}_{family_id}_{sample_id} ({status})\" " "visibility=2 itemRgb=\"On\"\n") line_template = "\t".join(["chr{chr}", "{start}", "{end}", "{info}", "0", "+", "{start}", "{end}", "{color}\n"]) sample_ids = samples_db.get_sample_ids_for_family(family_id) if twins: sample_filter = ("twin1", "twin2") else: sample_filter = ("mother", "father") for sample in cnvs: other_sample = None if twins: if sample == "twin1": other_sample = "twin2" else: other_sample = "twin1" if sample in sample_filter: output_file = os.path.join(output_dir, "{}.bed".format(sample)) f = open(output_file, "w") sample_id = sample_ids[sample] status = sample # Print the header f.write(header_template.format(**locals())) for chr in cnvs[sample]: for cnv in cnvs[sample][chr]: # If twin mode: check for reciprocal_overlap # the info field will have the reciprocal overlap. if twins: ro = cnv_struct.ro_to_best_in_list(cnv, cnvs[other_sample][chr]) start = cnv.start end = cnv.end cn = cnv.cn if cn < 2: col_idx = 0 elif cn == 2: col_idx = 1 else: col_idx = 2 color = colors[col_idx] if twins: info = "CN={},RO={};{}".format(cn, *ro) else: info = "CN={}".format(cn) f.write(line_template.format(**locals())) f.close()
if __name__ == "__main__": parser = argparse.ArgumentParser(description = ("Generates desired bam files " "can be called as an " "autonomous script or " "imported as a module.")) parser = validation.add_pickle_args(parser) parser.add_argument( "who", type=str, choices=["parents", "twins"], help=("Write the bed files for either the parents " "or the twins.") ) parser.add_argument( "--format", type=str, choices=parsers.__all__, help="The file format for the parser." ) parser.add_argument( "--base_dir", "-i", type=str, help=("The base directory for the calls. A directory " "with four subfodlers starting with father, " "mother, twin1 and twin2 are expected.") ) main(parser.parse_args())