# This is the second part of example configuration files for performing # efficient data clean up. All commented out parameters are those that are used # by default. # The input file should be the output file of the second sample missigness step # (which is the one that has been used by any of these scripts): # - sex_check # - find_related_samples # - check_ethnicity # - plate_bias # Note that the final usable dataset is the one located in the directory where # "remove_heterozygous_haploid" was run (which is the one that has been used by # any of these scripts): # - flag_maf_zero # - flag_hw # Hence, if you want to remove the flagged markers, you should use # pyGenClean_subset_data on markers in the "flag_maf_zero" and "flag_hw" # directories using the PLINK's binary file located in # "remove_heterozygous_haploid". [11] # ############################################################################## # After manually checking that everything went fine in the previous steps, you # need to create a list of samples to remove from steps [7] to [10] and a list # of markers to exclude from steps [6]. Just create a file containing family and # individual identification numbers for all those samples to remove. # ############################################################################## script = subset remove = /PATH/TO/FILE/CONTAINING/ALL_SAMPLES_FROM_PREVIOUS_STEPS_TO_REMOVE.txt exclude = /PATH/TO/FILE/CONTAINING/ALL_MARKERS_FROM_PREVIOUS_STEPS_TO_EXCLUDE.txt [12] # ############################################################################## # Removes heterozygous haploid genotypes from the dataset. # ############################################################################## script = remove_heterozygous_haploid [13] # ############################################################################## # Flags uninformative markers (with a MAF of 0). This step only flag markers. # You might want to exclude them later on. # ############################################################################## script = flag_maf_zero [14] # ############################################################################## # Flags markers that fail HWE test for a p value of 1e-4 and after Bonferroni # correction. This step only flag markers. You might want to exclude them later # on. # ############################################################################## script = flag_hw # hwe = 1e-4