Finished reading input. ++ echo CB6477/./s_6_1_sequence_read1.fastq ++ tr , ' ' + fastq_file_set1=CB6477/./s_6_1_sequence_read1.fastq ++ cd /data/maqgene/reads ++ cat CB6477/./s_6_1_sequence_read1.fastq ++ wc -l ++ cut -f 1 -d ' ' + TOTAL_LINES=172083540 ++ echo '(172083540 / 5000000) + (172083540 % 5000000 != 0)' ++ bc + num_chunks=35 + set +x make -j 14 -l 14 -C /data/maqgene/work --warn-undefined-variables -I /data/maqgene -f /data/maqgene/makefile BFA_FILE=elegans.bfa MAQGENE_GENOME_DIR=/data/maqgene/genomes reads_cksum=1524789528 map_cksum=1370449114 pileup_cksum=3903470389 cns_cksum=2093347293 umdd=50000 dmdd=1000 umid=1000 dmid=1000 max_gene_radius=50000 full_results_dir=/data/maqgene/out/example_user/CB6477_redo outfile_basename=CB6477_redo map_parameters=" -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0" assemble_parameters=" -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p " pileup_parameters=" -Q 100 -p -m 7 -q 0" CHUNK_SIZE=5000000 NUM_CHUNKS=35 Will process 172083540 lines of input in 35 chunks make: Entering directory `/data/maqgene/work' # Fri Apr 1 08:00:32 BST 2011: Removing frontend files in case this is a duplicate run ... rm -f /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_{grouped.txt,flat.txt} rm -f /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_uncovered.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_coverage.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_pileup.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_log.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_check.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_unmapped.txt make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' # Fri Apr 1 08:00:32 BST 2011: Extracting point mutants from consensus ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_snps; create table 2093347293_snps ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, indel_size int(5) NOT NULL, variant_type enum('indel', 'point') NOT NULL, ref_base char NOT NULL, sample_base char NOT NULL, snp_score int(5) NOT NULL, read_depth int(5) NOT NULL, loci_multiplicity double(5,2) NOT NULL, mapping_quality int(5) NOT NULL, neighbor_quality int(5) NOT NULL, primary key (id), unique key (dna, start), key (indel_size) )" /data/maqgene/bin/maq cns2snp 2093347293.cns \ | cat -b | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2093347293_snps (id, dna, @start, ref_base, sample_base, snp_score, read_depth, loci_multiplicity, mapping_quality, neighbor_quality) set start = @start - 1, end = @start, indel_size = 0, variant_type = 'point'; flush table 2093347293_snps;" # Fri Apr 1 08:00:52 BST 2011: Extracting indels from consensus ... /data/maqgene/bin/maq indelsoa /data/maqgene/genomes/elegans.bfa 1370449114.map \ | awk '{ if ($5+$6-$4 >= 5 && $4 <= 1) { print $0 } }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2093347293_snps (dna, @start, indel_size, @num_spanning_reads, @num_left_reads, @num_right_reads, @junk) set id = NULL, start = @start - 1, end = @start - 1 + if (indel_size > 0, 0, -indel_size), ref_base = 'X', sample_base = 'X', variant_type = 'indel', snp_score = -1000, read_depth = if (@num_left_reads < @num_right_reads, @num_left_reads, @num_right_reads), loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 2093347293_snps;" # Fri Apr 1 08:03:56 BST 2011: Adding placeholders for known SNPs. cat 3903470389_known_snps \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' ignore into table 2093347293_snps (dna, @start, ref_base, read_depth, @junk) set id = NULL, start = @start - 1, end = @start - 1, variant_type = 'point', snp_score = -1000, loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 2093347293_snps;" Found 161991 variants. touch 2093347293_snps # Fri Apr 1 08:03:59 BST 2011: Linking backend file 1370449114_unmapped.txt to /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_unmapped.txt ln -fs /data/maqgene/work/1370449114_unmapped.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_unmapped.txt # Fri Apr 1 08:03:59 BST 2011: Getting masking regions /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 2 -o >(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "drop table if exists 2093347293_masked_ids; create table 2093347293_masked_ids (query_region_id int(10) NOT NULL, primary key (query_region_id)); load data local infile '/dev/stdin' ignore into table 2093347293_masked_ids (@j1, query_region_id, @j3, @j4, @j5, @j6, @j7)") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 2093347293_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id,'elegans',dna,start,end,strand from elegans_features feature where feature.feature not in ('mRNA', 'intron')") 1>/dev/null; touch 2093347293_masked_ids # Fri Apr 1 08:04:03 BST 2011: Finding all genomic features overlapping variants... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_rel_snps; CREATE TABLE 2093347293_rel_snps ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2093347293_rel_snps; flush table 2093347293_rel_snps") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 2093347293_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null touch 2093347293_rel_snps # Fri Apr 1 08:04:09 BST 2011: Loading uncovered regions into table ... cat 3903470389_uncovered.txt \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3903470389_uncovered; create table 3903470389_uncovered ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, primary key (id) ) auto_increment = 161992; load data local infile '/dev/stdin' into table 3903470389_uncovered (dna, start, end)" Uncovered region statistics: chromosome number_uncovered_regions total_uncovered_length I 11 1230 II 21 2650 III 15 1670 IV 17 1686 V 17 3096 X 19 6870 touch 3903470389_uncovered # Fri Apr 1 08:04:09 BST 2011: Getting intergenic regions /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_rel_intergenic; create table 2093347293_rel_intergenic like 2093347293_rel_snps"; /data/maqgene/bin/associate_regions -b 50000 -c -1000000 -m 2 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2093347293_rel_intergenic; flush table 2093347293_rel_intergenic") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 2093347293_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans',dna,start,end,strand from elegans_features feature where feature.feature = 'mRNA'") 1>/dev/null; # Fri Apr 1 08:04:14 BST 2011: Filtering masked intergenic regions /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "delete i.* from 2093347293_rel_intergenic i join 2093347293_masked_ids m using (query_region_id)" touch 2093347293_rel_intergenic # Fri Apr 1 08:04:19 BST 2011: Finding mRNA features overlapping uncovered regions ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3903470389_uncovered_rel; CREATE TABLE 3903470389_uncovered_rel ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 3903470389_uncovered_rel; flush table 3903470389_uncovered_rel") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 3903470389_uncovered") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null touch 3903470389_uncovered_rel # Fri Apr 1 08:04:21 BST 2011: Classifying intergenic relations # Fri Apr 1 08:04:21 BST 2011: Calculating translation start offsets... # Fri Apr 1 08:04:21 BST 2011: Linking backend file 1370449114_check.txt to /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_check.txt /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_intergenic_assoc; create table 2093347293_intergenic_assoc ( id INT(10) NOT NULL, target_region_id INT(10) NOT NULL, relation enum ('upstream', 'downstream', 'into') NOT NULL, distance INT(10) NOT NULL, num_regions_between INT(3) NOT NULL, attribute varchar(50) NOT NULL, primary key (id, target_region_id), key (id, attribute) ); insert into 2093347293_intergenic_assoc select snp.id, rel.target_region_id, if (rel.overlap > 0, 'into', if (feature.strand = '+', if (snp.end < feature.start, 'upstream', 'downstream'), if (feature.end < snp.start, 'upstream', 'downstream') ) ) relation, rel.distance, rel.num_regions_between, feature.attribute from 2093347293_snps snp join 2093347293_rel_intergenic rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id)" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_offsets_snps; create table 2093347293_offsets_snps ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" ln -fs /data/maqgene/work/1370449114_check.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_check.txt # Fri Apr 1 08:04:21 BST 2011: Calculating translation start offsets for uncovered regions... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3903470389_offsets_uncovered; create table 3903470389_offsets_uncovered ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_offsets_snps select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 2093347293_snps reg join 2093347293_rel_snps rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 3903470389_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 3903470389_uncovered reg join 3903470389_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 3903470389_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start))), if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))), if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start)))), sum(exon.end - exon.start) total_length, 'end' boundary_type from 3903470389_uncovered reg join 3903470389_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" touch 3903470389_offsets_uncovered # Fri Apr 1 08:04:23 BST 2011: Linking backend file 3903470389_pileup.txt to /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_pileup.txt ln -fs /data/maqgene/work/3903470389_pileup.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_pileup.txt # Fri Apr 1 08:04:23 BST 2011: Linking backend file 3903470389_coverage.txt to /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_coverage.txt ln -fs /data/maqgene/work/3903470389_coverage.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_coverage.txt # Fri Apr 1 08:04:23 BST 2011: Linking backend file 2093347293_log.txt to /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_log.txt ln -fs /data/maqgene/work/2093347293_log.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_log.txt # Fri Apr 1 08:04:23 BST 2011: Linking backend file 3903470389_uncovered.txt to /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_uncovered.txt ln -fs /data/maqgene/work/3903470389_uncovered.txt /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_uncovered.txt # Fri Apr 1 08:04:23 BST 2011: Filtering and loading pileup for analysis ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3903470389_pileup; create table 3903470389_pileup ( dna enum('I','II','III','IV','MtDNA','V','X') NOT NULL, start int(10) NOT NULL, ref_base char(1) NOT NULL, read_depth int(5) NOT NULL, sample_reads varchar(100) NOT NULL, A_fwd int(5) NOT NULL, A_rev int(5) NOT NULL, C_fwd int(5) NOT NULL, C_rev int(5) NOT NULL, G_fwd int(5) NOT NULL, G_rev int(5) NOT NULL, T_fwd int(5) NOT NULL, T_rev int(5) NOT NULL, wt_fwd int(5) NOT NULL, wt_rev int(5) NOT NULL, primary key (dna, start) );" cat 3903470389_pileup.txt \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 2 \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 3903470389_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 3903470389_pileup" touch 2093347293_intergenic_assoc touch 2093347293_offsets_snps cat 3903470389_known_snps \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 3903470389_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 3903470389_pileup" touch 3903470389_pileup # Fri Apr 1 08:06:11 BST 2011: Getting codons affected by point mutations... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_codons; create table 2093347293_codons ( id int(10) NOT NULL, gene varchar(50) NOT NULL, ref_codon char(4) NOT NULL, sam_codon char(4) NOT NULL, start char(1) NOT NULL, primary key (id, gene) ); insert into 2093347293_codons select snp.id, off.gene, @ref_codon := substr(gene_dna.sequence, (floor(if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1 ) / 3 ) * 3 ) + 1, 3 ) ref_codon, insert(@ref_codon, (if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1) % 3) + 1, 1, if(feature.strand = '+', snp.sample_base, substr('ACGTTGCA', instr('ACGTTGCA', snp.sample_base)+4, 1) ) ) sam_codon, if((feature.strand = '+' && off.5prime_offset < 3) || (feature.strand = '-' && off.5prime_offset - 1 < 3), 'Y', 'N') start from 2093347293_offsets_snps off join 2093347293_snps snp on (off.id = snp.id) join elegans_coding_dna gene_dna using (gene) join elegans_features feature on (gene_dna.gene = feature.attribute and feature.start <= snp.start and feature.end >= snp.end) where snp.variant_type = 'point' and feature.feature = 'exon' and off.boundary_type = 'start' and off.5prime_offset >= 0 and off.3prime_offset >= 0;" touch 2093347293_codons # Fri Apr 1 08:06:21 BST 2011: Writing snp read counts ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select f.id, f.dna, f.start + 1 start, e.snp_name, e.snp_base, if(e.snp_base = 'a', p.A_fwd + p.A_rev, if (e.snp_base = 'c', p.C_fwd + p.C_rev, if (e.snp_base = 'g', p.G_fwd + p.G_rev, if (e.snp_base = 't', p.T_fwd + p.T_rev, -1)))) snp_reads, p.ref_base, p.wt_fwd + p.wt_rev wt_reads, p.read_depth, p.sample_reads from elegans_features f join elegans_snp_changes e on (f.attribute = e.snp_name) join 3903470389_pileup p using (dna,start)" > /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_snp_read_counts.txt 0 snp read count lines written. /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_marked; create table 2093347293_marked ( id int(10) NOT NULL, class varchar(100) NOT NULL, description varchar(100) NOT NULL, parent_feature varchar(100) NOT NULL, primary key (id, class, parent_feature) );" # Fri Apr 1 08:06:21 BST 2011: Finding exonic indels ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_marked select snp.id, if (snp.indel_size < 0, if (rel.overlap % 3 = 0, 'inframe', 'frameshift'), if (snp.indel_size % 3 = 0, 'inframe', 'frameshift')) class, if (rel.overlap != snp.end - snp.start, 'exon_boundary', 'none') description, feature.attribute parent_feature from 2093347293_snps snp join 2093347293_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'exon' and snp.variant_type = 'indel';" # Fri Apr 1 08:06:21 BST 2011: Finding coding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_marked select trans.id, if (reference_codons.amino = sample_codons.amino, 'silent', if (reference_codons.amino3 = 'stop', 'readthrough', if (sample_codons.amino3 = 'stop', 'premature_stop', if (trans.start = 'Y' and sample_codons.is_start != 'Y', 'non_start', 'missense') ) ) ) class, concat(ref_codon,'->',sam_codon,'[', reference_codons.amino3,'->',sample_codons.amino3,']') description, trans.gene from 2093347293_codons trans join elegans_genetic_code reference_codons on (trans.ref_codon = reference_codons.codon) join elegans_genetic_code sample_codons on (trans.sam_codon = sample_codons.codon);" # Fri Apr 1 08:06:22 BST 2011: Finding noncoding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_marked select snp.id, feature.feature class, 'none' description, feature.attribute parent_feature from 2093347293_snps snp join 2093347293_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature not in ('exon', 'intron', 'mRNA');" # Fri Apr 1 08:06:23 BST 2011: Finding splice site variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_marked select snp.id, if (feature.strand = '+', if (snp.start - feature.start < 2, 'splice_donor', 'splice_acceptor'), if (snp.start - feature.start < 2, 'splice_acceptor', 'splice_donor')) class, 'none' description, feature.attribute parent_feature from 2093347293_snps snp join 2093347293_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'intron' and rel.distance > -2;" # Fri Apr 1 08:06:23 BST 2011: Finding intergenic and intronic variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_marked select id, 'nongenic' class, concat(if (relation in ('upstream','into'), - distance, distance),' ',relation) description, attribute from 2093347293_intergenic_assoc where ((relation = 'upstream' and num_regions_between = 0 and distance < 50000) or (relation = 'upstream' and num_regions_between < 2 and distance < 1000) or (relation = 'downstream' and num_regions_between = 0 and distance < 1000) or (relation = 'downstream' and num_regions_between < 2 and distance < 1000) or relation = 'into' );" # Fri Apr 1 08:06:27 BST 2011: Finding uncovered regions in genes ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2093347293_marked select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',unc.start - gene.start,' to ',unc.end - gene.start,' bp into'), concat( if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), concat('codon ',floor(b.5prime_offset / 3) + 1) ) ), ' to ', if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), concat('codon ',ceiling(e.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(e.5prime_offset < 0,0,e.5prime_offset) - (if(b.5prime_offset < 0,0,b.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 3903470389_uncovered unc join 3903470389_offsets_uncovered b using (id) join 3903470389_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '+' and gene.feature = 'mRNA' union select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',gene.end - unc.end,' to ',gene.end - unc.start,' bp into'), concat( if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), concat('codon ',floor(e.5prime_offset / 3) + 1) ) ), ' to ', if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), concat('codon ',ceiling(b.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(b.5prime_offset < 0,0,b.5prime_offset) - (if(e.5prime_offset < 0,0,e.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 3903470389_uncovered unc join 3903470389_offsets_uncovered b using (id) join 3903470389_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '-' and gene.feature = 'mRNA'" # Fri Apr 1 08:06:27 BST 2011: Variants found: +-----------------+-----------------+ | class | number_variants | +-----------------+-----------------+ | five_prime_UTR | 1298 | | frameshift | 65 | | inframe | 20 | | missense | 6310 | | ncRNA | 883 | | nongenic | 125193 | | non_start | 10 | | premature_stop | 694 | | readthrough | 19 | | silent | 7485 | | SNP | 1032 | | splice_acceptor | 47 | | splice_donor | 50 | | three_prime_UTR | 3568 | | uncovered | 30 | +-----------------+-----------------+ touch 2093347293_marked # Fri Apr 1 08:06:28 BST 2011: Combining results ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_combined; create table 2093347293_combined (primary key (id, class, description), key (dna, start)) as select snp.id, 'CB6477_redo' mutant_strain, snp.dna, snp.start + 1 start, snp.end - snp.start length, snp.ref_base reference_base, snp.sample_base sample_base, snp.snp_score consensus_score, snp.loci_multiplicity, snp.mapping_quality, snp.neighbor_quality, (pile.wt_fwd + pile.wt_rev) number_wildtype_reads, (snp.read_depth - (pile.wt_fwd + pile.wt_rev)) number_variant_reads, snp.read_depth sequencing_depth, pile.sample_reads, snp.variant_type, snp.indel_size, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 2093347293_snps snp join 3903470389_pileup pile using (dna,start) join 2093347293_marked class on (snp.id = class.id) left join elegans_per_locus using (dna,start) where (snp.variant_type = 'point' and (snp.snp_score >= 3 and snp.loci_multiplicity <= 10 and snp.neighbor_quality >= 0 and pile.read_depth >= 4 and ((pile.read_depth - pile.wt_fwd - pile.wt_rev) / pile.read_depth) >= 0.5) or snp.variant_type = 'indel' ) group by id, class, description union select unc.id id, 'CB6477_redo' mutant_strain, unc.dna, unc.start + 1 start, unc.end - unc.start length, '-' reference_base, '-' sample_base, 0 consensus_score, 0, 0, 0, 0 number_wildtype_reads, 0 number_variant_reads, 0 sequencing_depth, '' sample_reads, 'uncovered', 0, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 3903470389_uncovered unc join 2093347293_marked class on (unc.id = class.id) left join elegans_per_locus using (dna,start) group by id, class, description; alter table 2093347293_combined order by dna, start, length" touch 2093347293_combined # Fri Apr 1 08:06:30 BST 2011: Writing results to grouped file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select id, mutant_strain, dna, start, length, reference_base, sample_base, consensus_score, loci_multiplicity, mapping_quality, neighbor_quality, number_wildtype_reads, number_variant_reads, sequencing_depth, sample_reads, variant_type, indel_size, group_concat(class) classes, group_concat(description) descriptions, group_concat(concat('{',parent_features,'}')) parent_features from 2093347293_combined group by id order by dna, start;" > /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_grouped.txt 2822 lines written. # Fri Apr 1 08:06:30 BST 2011: Writing results to flat file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select * from 2093347293_combined" > /data/maqgene/out/example_user/CB6477_redo/CB6477_redo_flat.txt 4748 lines written. make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' # Fri Apr 1 08:06:31 BST 2011: Cleaning intermediate data ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2093347293_snps,2093347293_rel,2093347293_masked_ids,2093347293_rel_intergenic,2093347293_intergenic_assoc,2093347293_offsets,2093347293_codons,2093347293_marked,2093347293_combined,3903470389_pileup" rm -f 2093347293_snps 2093347293_rel 2093347293_masked_ids 2093347293_rel_intergenic 2093347293_intergenic_assoc 2093347293_offsets 2093347293_codons 2093347293_marked 2093347293_combined 3903470389_pileup make: Leaving directory `/data/maqgene/work'