/* * parameters.cpp * * Created on: Nov 11, 2009 * Author: Adam Auton * ($Revision: 249 $) */ // Class for reading in, checking and storing user parameters #include "parameters.h" parameters::parameters(int argc, char *argv[]) { string tmp; for (int i=0; iargv.push_back(tmp); } BED_exclude = false; BED_file = ""; derived = false; diff_discordance_matrix = false; diff_file = ""; diff_file_compressed = false; diff_indv_discordance = false; diff_site_discordance = false; diff_switch_error = false; end_pos = numeric_limits::max(); exclude_positions_file = ""; force_write_index = false; fst_window_size = -1; fst_window_step = -1; indv_exclude_file = ""; indv_keep_file = ""; invert_mask = false; keep_only_indels = false; recode_all_INFO = false; ld_bp_window_size = numeric_limits::max(); ld_snp_window_size = numeric_limits::max(); ld_bp_window_min = -1; ld_snp_window_min = -1; min_mac = -1; min_maf = -1.0; mask_file = ""; max_alleles = numeric_limits::max(); max_genotype_depth = numeric_limits::max(); max_indv_mean_depth = numeric_limits::max(); max_mac = numeric_limits::max(); max_maf = numeric_limits::max(); max_mean_depth = numeric_limits::max(); max_missing_call_count = numeric_limits::max(); max_non_ref_ac = numeric_limits::max(); max_non_ref_af = numeric_limits::max(); max_N_indv = -1; min_alleles = -1; min_genotype_depth = -1; min_genotype_quality = -1.0; min_HWE_pvalue = -1.0; min_indv_call_rate = 0; min_indv_mean_depth = -1.0; min_interSNP_distance = -1; min_kept_mask_value = 0; min_mean_depth = -1.0; min_quality = -1.0; min_r2 = -1.0; min_site_call_rate = 0; min_non_ref_ac = -1; min_non_ref_af = -1.0; output_012_matrix = false; output_as_IMPUTE = false; output_as_ldhat_phased = false; output_as_ldhat_unphased = false; output_BEAGLE_genotype_likelihoods_GL = false; output_BEAGLE_genotype_likelihoods_PL = false; output_counts = false; output_filter_summary = false; output_filtered_sites = false; output_freq = false; output_geno_depth = false; output_geno_rsq = false; output_hap_rsq = false; output_het = false; output_HWE = false; output_indel_hist = false; output_indv_depth = false; output_interchromosomal_hap_rsq = false; output_interchromosomal_geno_rsq = false; output_LROH = false; output_missingness = false; output_N_PCA_SNP_loadings = -1; output_PCA = false; output_prefix="out"; output_relatedness = false; output_singletons = false; output_site_depth = false; output_site_mean_depth = false; output_site_pi=false; output_site_quality = false; output_SNP_density_bin_size = 0; output_Tajima_D_bin_size = 0; output_TsTv_bin_size = 0; output_TsTv_by_count = false; output_TsTv_by_qual = false; phased_only = false; PCA_no_normalisation = false; pi_window_size = 0; pi_window_step = 0; plink_output = false; plink_tped_output = false; positions_file = ""; recode = false; recode_to_stream = false; remove_all_filtered_genotypes = false; remove_all_filtered_sites = false; remove_indels = false; snps_to_exclude_file = ""; snps_to_keep_file = ""; start_pos = -1; suppress_allele_output = false; vcf_filename=""; vcf_compressed = false; } void parameters::read_parameters() { unsigned int i=1; string in_str; while (i=argv.size()) error("Requested Missing Argument",76); return argv[i]; } void parameters::print_params() { parameters defaults(0, 0); LOG.printLOG("Parameters as interpreted:\n"); if (vcf_filename != defaults.vcf_filename) { if (vcf_compressed == false) LOG.printLOG("\t--vcf " + vcf_filename + "\n"); else LOG.printLOG("\t--gzvcf " + vcf_filename + "\n"); } if (chrs_to_keep.size() > 0) { for (set::iterator it=chrs_to_keep.begin(); it != chrs_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--chr " + tmp + "\n"); } } if (chrs_to_exclude.size() > 0) { for (set::iterator it=chrs_to_exclude.begin(); it != chrs_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--not-chr " + tmp + "\n"); } } if (derived != defaults.derived) LOG.printLOG("\t--derived\n"); if (end_pos != defaults.end_pos) LOG.printLOG("\t--to-bp " + output_log::int2str(end_pos) + "\n"); if (exclude_positions_file != defaults.exclude_positions_file) LOG.printLOG("\t--exclude-positions " + exclude_positions_file + "\n"); if (force_write_index != defaults.force_write_index) LOG.printLOG("\t--force-index-write\n"); if (FORMAT_id_to_extract != defaults.FORMAT_id_to_extract) LOG.printLOG("\t--extract-FORMAT-info " + FORMAT_id_to_extract + "\n"); if (geno_rsq_position_list != defaults.geno_rsq_position_list) LOG.printLOG("\t--geno-r2-positions " + geno_rsq_position_list + "\n"); if (hap_rsq_position_list != defaults.hap_rsq_position_list) LOG.printLOG("\t--hap-r2-positions " + hap_rsq_position_list + "\n"); if (hapmap_fst_populations.size() != 0) { for (unsigned int ui=0; ui 0) for (set::iterator it=site_filter_flags_to_exclude.begin(); it != site_filter_flags_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-filtered " + tmp + "\n"); } if (site_filter_flags_to_keep.size() > 0) for (set::iterator it=site_filter_flags_to_keep.begin(); it != site_filter_flags_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--keep-filtered " + tmp + "\n"); } if (geno_filter_flags_to_exclude.size() > 0) for (set::iterator it=geno_filter_flags_to_exclude.begin(); it != geno_filter_flags_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-filtered-geno " + tmp + "\n"); } if (INFO_to_extract.size() > 0) for (unsigned int ui=0; ui 0) for (set::iterator it=recode_INFO_to_keep.begin(); it != recode_INFO_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--recode-INFO " + tmp + "\n"); } if (site_INFO_flags_to_remove.size() > 0) for (set::iterator it=site_INFO_flags_to_remove.begin(); it != site_INFO_flags_to_remove.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-INFO " + tmp + "\n"); } if (site_INFO_flags_to_keep.size() > 0) for (set::iterator it=site_INFO_flags_to_keep.begin(); it != site_INFO_flags_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--keep-INFO " + tmp + "\n*** Note: --keep-INFO has changed. Are you sure you don't want --recode-INFO? ***\n"); } if (BED_file != defaults.BED_file) { if (BED_exclude == false) LOG.printLOG("\t--bed " + BED_file + "\n"); else LOG.printLOG("\t--exclude-bed " + BED_file + "\n"); } if (mask_file != defaults.mask_file) { if (invert_mask == false) LOG.printLOG("\t--mask " + mask_file + "\n"); else LOG.printLOG("\t--invert-mask " + mask_file + "\n"); } if (snps_to_keep.size() > 0) for (set::iterator it=snps_to_keep.begin(); it != snps_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--snp " + tmp + "\n"); } if (indv_to_keep.size() > 0) for (set::iterator it=indv_to_keep.begin(); it != indv_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--indv " + tmp + "\n"); } if (indv_to_exclude.size() > 0) for (set::iterator it=indv_to_exclude.begin(); it != indv_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-indv " + tmp + "\n"); } LOG.printLOG("\n"); } void parameters::print_help() { unsigned int i; string in_str; if (argv.size() <= 1) { // If there are no user parameters, display help. argv.push_back("--?"); print_help(); } for(i = 0; i < argv.size(); i++) { in_str = argv[i]; if ((in_str == "-h") || (in_str == "-?") || (in_str == "-help") || (in_str == "--?") || (in_str == "--help") || (in_str == "--h")) { cout << endl << "VCFtools (" << VCFTOOLS_VERSION << ")" << endl; cout << "\u00A9 Adam Auton 2009" << endl << endl; cout << "Process Variant Call Format files" << endl; cout << endl; cout << "For a list of options, please go to:" << endl; cout << "\thttp://vcftools.sourceforge.net/options.html" << endl; cout << endl; cout << "Questions, comments, and suggestions should be emailed to:" << endl; cout << "\tvcftools-help@lists.sourceforge.net" << endl; cout << endl; exit(0); } } } void parameters::check_parameters() { parameters defaults(0, 0); if (vcf_filename == "") error("VCF required.", 0); if (end_pos < start_pos) error("End position must be greater than Start position.", 1); if (((end_pos != numeric_limits::max()) || (start_pos != -1)) && (chrs_to_keep.size() != 1)) error("Require a single chromosome when specifying a range.", 2); if (max_maf < min_maf) error("Maximum MAF must be not be less than Minimum MAF.", 4); if (max_mac < min_mac) error("Maximum MAC must be not be less than Minimum MAC.", 4); if (min_maf != defaults.min_maf) { if ((min_maf < 0.0) || (min_maf > 1.0)) error("MAF must be between 0 and 1.", 4); } if (max_maf != defaults.max_maf) { if ((max_maf < 0.0) || (max_maf > 1.0)) error("Maximum MAF must be between 0 and 1.", 4); } if (min_non_ref_af != defaults.min_non_ref_af) { if ((min_non_ref_af < 0.0) || (min_non_ref_af > 1.0)) error("Non-Ref Allele Frequency must be between 0 and 1.", 4); } if (max_non_ref_af < min_non_ref_af) error("Maximum Non-Ref Allele Frequency must not be less that Minimum Non-Ref AF.", 4); if (max_non_ref_ac < min_non_ref_ac) error("Maximum Non-Ref Allele Count must not be less that Minimum Non-Ref AC.", 4); if ((min_site_call_rate > 1) || (min_indv_call_rate > 1)) error("Minimum Call rates cannot be greater than 1.", 5); if (max_alleles < min_alleles) error("Max Number of Alleles must be greater than Min Number of Alleles.", 6); if (max_mean_depth < min_mean_depth) error("Max Mean Depth must be greater the Min Mean Depth.", 7); if (max_indv_mean_depth < min_indv_mean_depth) error("Max Indv Mean Depth must be greater the Min Indv Mean Depth.", 8); if (max_genotype_depth < min_genotype_depth) error("Max Genotype Depth must be greater than Min Genotype Depth.", 9); if (((output_as_ldhat_phased == true) || (output_as_ldhat_unphased)) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting LDhat format.", 11); if ((output_BEAGLE_genotype_likelihoods_GL == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11); if ((output_BEAGLE_genotype_likelihoods_PL == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11); if (min_kept_mask_value > 9) error("Min Mask value must be between 0 and 9.", 14); if ((output_LROH == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting LROH.", 11); if (output_TsTv_bin_size < 0) error("TsTv bin size must be > 0",16); if (output_Tajima_D_bin_size < 0) error("Tajima D bin size must be > 0", 17); if (pi_window_size < 0) error("Pi Window size must be > 0", 18); if (output_SNP_density_bin_size < 0) error("SNP density bin size must be > 0", 18); } void parameters::error(string err_msg, int code) { LOG.printLOG("\n\nError: " + err_msg + "\n\n"); exit(code); }