Finished reading input. ++ echo ot340/./ot340_chrI_11m_12m_read1.fastq ++ tr , ' ' + fastq_file_set1=ot340/./ot340_chrI_11m_12m_read1.fastq ++ cd /data/maqgene/reads ++ cat ot340/./ot340_chrI_11m_12m_read1.fastq ++ wc -l ++ cut -f 1 -d ' ' + TOTAL_LINES=478592 ++ echo '(478592 / 5000000) + (478592 % 5000000 != 0)' ++ bc + num_chunks=1 + set +x make -j 14 -l 14 -C /data/maqgene/work --warn-undefined-variables -I /data/maqgene -f /data/maqgene/makefile BFA_FILE=elegans.bfa MAQGENE_GENOME_DIR=/data/maqgene/genomes reads_cksum=3039536873 map_cksum=2393555056 pileup_cksum=1678059181 cns_cksum=1192972776 umdd=50000 dmdd=1000 umid=1000 dmid=1000 max_gene_radius=50000 full_results_dir=/data/maqgene/out/example_user/ot340 outfile_basename=ot340 map_parameters=" -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0" assemble_parameters=" -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p " pileup_parameters=" -Q 100 -p -m 7 -q 0" CHUNK_SIZE=5000000 NUM_CHUNKS=1 Will process 478592 lines of input in 1 chunks make: Entering directory `/data/maqgene/work' # Thu Feb 3 15:34:38 GMT 2011: Removing frontend files in case this is a duplicate run ... rm -f /data/maqgene/out/example_user/ot340/ot340_{grouped.txt,flat.txt} rm -f /data/maqgene/out/example_user/ot340/ot340_uncovered.txt /data/maqgene/out/example_user/ot340/ot340_coverage.txt /data/maqgene/out/example_user/ot340/ot340_pileup.txt /data/maqgene/out/example_user/ot340/ot340_log.txt /data/maqgene/out/example_user/ot340/ot340_check.txt /data/maqgene/out/example_user/ot340/ot340_unmapped.txt make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' /data/maqgene/makefile:76: warning: undefined variable `date' /data/maqgene/makefile:78: warning: undefined variable `date' # : Regrouping fastq reads into chunks of size 5000000. split -l 5000000 -a 5 -d <(cat /data/maqgene/reads/ot340/./ot340_chrI_11m_12m_read1.fastq) 2393555056.1.fastq. # : Regrouping fastq reads into chunks of size 5000000. split -l 5000000 -a 5 -d <(cat /data/maqgene/reads/ot340/./ot340_chrI_11m_12m_read2.fastq) 2393555056.2.fastq. for stem in 00000; do mv 2393555056.2.fastq.$stem 2393555056.$stem.2.fastq; done for stem in 00000; do mv 2393555056.1.fastq.$stem 2393555056.$stem.1.fastq; done touch 3039536873_split2 touch 3039536873_split1 # Thu Feb 3 15:34:38 GMT 2011: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 2393555056.00000.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 2393555056.00000.2.bfq -- finish writing file '2393555056.00000.2.bfq' -- 119648 sequences were loaded. # Thu Feb 3 15:34:39 GMT 2011: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 2393555056.00000.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 2393555056.00000.1.bfq -- finish writing file '2393555056.00000.1.bfq' -- 119648 sequences were loaded. # Thu Feb 3 15:34:40 GMT 2011: Mapping file(s) 2393555056.00000.1.bfq 2393555056.00000.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 2393555056.00000.unmapped -H 2393555056.00000.mismatch \ 2393555056.00000.map /data/maqgene/genomes/elegans.bfa 2393555056.00000.1.bfq 2393555056.00000.2.bfq 2> /dev/null # Thu Feb 3 15:35:58 GMT 2011: Merging all maps ... /data/maqgene/bin/maq mapmerge 2393555056.map 2393555056.00000.map # Thu Feb 3 15:35:58 GMT 2011: Merging all *.unmapped files ... cat 2393555056.00000.unmapped > 2393555056_unmapped.txt # Thu Feb 3 15:35:58 GMT 2011: Linking backend file 2393555056_unmapped.txt to /data/maqgene/out/example_user/ot340/ot340_unmapped.txt ln -fs /data/maqgene/work/2393555056_unmapped.txt /data/maqgene/out/example_user/ot340/ot340_unmapped.txt /data/maqgene/makefile:126: warning: undefined variable `mapcheck_parameters' # Thu Feb 3 15:36:00 GMT 2011: Generating consensus ... /data/maqgene/bin/maq assemble -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p 1192972776.cns \ /data/maqgene/genomes/elegans.bfa 2393555056.map 2> 1192972776_log.txt # Thu Feb 3 15:36:00 GMT 2011: Running 'mapcheck' ... /data/maqgene/bin/maq mapcheck /data/maqgene/genomes/elegans.bfa 2393555056.map > 2393555056_check.txt \ 2>/dev/null # Thu Feb 3 15:36:00 GMT 2011: Creating pileup ... /data/maqgene/bin/maq pileup -Q 100 -p -m 7 -q 0 /data/maqgene/genomes/elegans.bfa 2393555056.map > 1678059181_pileup.txt # Thu Feb 3 15:36:02 GMT 2011: Linking backend file 2393555056_check.txt to /data/maqgene/out/example_user/ot340/ot340_check.txt ln -fs /data/maqgene/work/2393555056_check.txt /data/maqgene/out/example_user/ot340/ot340_check.txt /data/maqgene/bin/filter_matching_lines 1678059181_pileup.txt "%s\t%i\t" \ <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select distinct dna, start + 1 from elegans_features where feature = 'SNP' order by dna, start") "%s\t%i\n" si \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 0 \ > 1678059181_known_snps # Thu Feb 3 15:36:31 GMT 2011: Getting uncovered regions ... # Thu Feb 3 15:36:31 GMT 2011: Making coverage histogram ... cut -f 1,2,4 1678059181_pileup.txt \ | /data/maqgene/bin/get_uncovered_regions 50 \ > 1678059181_uncovered.txt (echo -en "sequencing_depth\tnumber_of_bases\n"; \ cut -f 4 1678059181_pileup.txt \ | /data/maqgene/bin/pileup_histogram 100) > 1678059181_coverage.txt ln -fs /data/maqgene/work/1678059181_pileup.txt /data/maqgene/out/example_user/ot340/ot340_pileup.txt # Thu Feb 3 15:36:41 GMT 2011: Linking backend file 1192972776_log.txt to /data/maqgene/out/example_user/ot340/ot340_log.txt ln -fs /data/maqgene/work/1192972776_log.txt /data/maqgene/out/example_user/ot340/ot340_log.txt # Thu Feb 3 15:36:42 GMT 2011: Linking backend file 1678059181_coverage.txt to /data/maqgene/out/example_user/ot340/ot340_coverage.txt ln -fs /data/maqgene/work/1678059181_coverage.txt /data/maqgene/out/example_user/ot340/ot340_coverage.txt # Thu Feb 3 15:36:55 GMT 2011: Filtering and loading pileup for analysis ... # Thu Feb 3 15:36:55 GMT 2011: Extracting point mutants from consensus ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_snps; create table 1192972776_snps ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, indel_size int(5) NOT NULL, variant_type enum('indel', 'point') NOT NULL, ref_base char NOT NULL, sample_base char NOT NULL, snp_score int(5) NOT NULL, read_depth int(5) NOT NULL, loci_multiplicity double(5,2) NOT NULL, mapping_quality int(5) NOT NULL, neighbor_quality int(5) NOT NULL, primary key (id), unique key (dna, start), key (indel_size) )" /data/maqgene/bin/maq cns2snp 1192972776.cns \ | cat -b | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1192972776_snps (id, dna, @start, ref_base, sample_base, snp_score, read_depth, loci_multiplicity, mapping_quality, neighbor_quality) set start = @start - 1, end = @start, indel_size = 0, variant_type = 'point'; flush table 1192972776_snps;" cat 1678059181_pileup.txt \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 2 \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1678059181_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 1678059181_pileup" # Thu Feb 3 15:36:58 GMT 2011: Linking backend file 1678059181_uncovered.txt to /data/maqgene/out/example_user/ot340/ot340_uncovered.txt ln -fs /data/maqgene/work/1678059181_uncovered.txt /data/maqgene/out/example_user/ot340/ot340_uncovered.txt # Thu Feb 3 15:37:06 GMT 2011: Extracting indels from consensus ... /data/maqgene/bin/maq indelsoa /data/maqgene/genomes/elegans.bfa 2393555056.map \ | awk '{ if ($5+$6-$4 >= 5 && $4 <= 1) { print $0 } }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1192972776_snps (dna, @start, indel_size, @num_spanning_reads, @num_left_reads, @num_right_reads, @junk) set id = NULL, start = @start - 1, end = @start - 1 + if (indel_size > 0, 0, -indel_size), ref_base = 'X', sample_base = 'X', variant_type = 'indel', snp_score = -1000, read_depth = if (@num_left_reads < @num_right_reads, @num_left_reads, @num_right_reads), loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 1192972776_snps;" # Thu Feb 3 15:37:07 GMT 2011: Adding placeholders for known SNPs. cat 1678059181_known_snps \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' ignore into table 1192972776_snps (dna, @start, ref_base, read_depth, @junk) set id = NULL, start = @start - 1, end = @start - 1, variant_type = 'point', snp_score = -1000, loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 1192972776_snps;" Found 167350 variants. touch 1192972776_snps # Thu Feb 3 15:37:09 GMT 2011: Loading uncovered regions into table ... # Thu Feb 3 15:37:09 GMT 2011: Getting masking regions cat 1678059181_uncovered.txt \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1678059181_uncovered; create table 1678059181_uncovered ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, primary key (id) ) auto_increment = 167351; load data local infile '/dev/stdin' into table 1678059181_uncovered (dna, start, end)" # Thu Feb 3 15:37:09 GMT 2011: Finding all genomic features overlapping variants... /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 2 -o >(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "drop table if exists 1192972776_masked_ids; create table 1192972776_masked_ids (query_region_id int(10) NOT NULL, primary key (query_region_id)); load data local infile '/dev/stdin' ignore into table 1192972776_masked_ids (@j1, query_region_id, @j3, @j4, @j5, @j6, @j7)") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 1192972776_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id,'elegans',dna,start,end,strand from elegans_features feature where feature.feature not in ('mRNA', 'intron')") 1>/dev/null; /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_rel_snps; CREATE TABLE 1192972776_rel_snps ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1192972776_rel_snps; flush table 1192972776_rel_snps") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 1192972776_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null Uncovered region statistics: chromosome number_uncovered_regions total_uncovered_length I 623 14033486 II 462 15247806 III 414 13754030 IV 637 17443341 V 576 20885477 X 370 17687063 touch 1678059181_uncovered # Thu Feb 3 15:37:09 GMT 2011: Finding mRNA features overlapping uncovered regions ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1678059181_uncovered_rel; CREATE TABLE 1678059181_uncovered_rel ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1678059181_uncovered_rel; flush table 1678059181_uncovered_rel") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 1678059181_uncovered") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null touch 1192972776_masked_ids touch 1192972776_rel_snps # Thu Feb 3 15:37:12 GMT 2011: Calculating translation start offsets... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_offsets_snps; create table 1192972776_offsets_snps ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" # Thu Feb 3 15:37:12 GMT 2011: Getting intergenic regions /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_rel_intergenic; create table 1192972776_rel_intergenic like 1192972776_rel_snps"; /data/maqgene/bin/associate_regions -b 50000 -c -1000000 -m 2 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1192972776_rel_intergenic; flush table 1192972776_rel_intergenic") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 1192972776_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans',dna,start,end,strand from elegans_features feature where feature.feature = 'mRNA'") 1>/dev/null; /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_offsets_snps select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 1192972776_snps reg join 1192972776_rel_snps rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" # Thu Feb 3 15:37:16 GMT 2011: Filtering masked intergenic regions /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "delete i.* from 1192972776_rel_intergenic i join 1192972776_masked_ids m using (query_region_id)" touch 1678059181_uncovered_rel # Thu Feb 3 15:37:17 GMT 2011: Calculating translation start offsets for uncovered regions... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1678059181_offsets_uncovered; create table 1678059181_offsets_uncovered ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1678059181_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 1678059181_uncovered reg join 1678059181_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1678059181_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start))), if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))), if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start)))), sum(exon.end - exon.start) total_length, 'end' boundary_type from 1678059181_uncovered reg join 1678059181_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" touch 1678059181_offsets_uncovered touch 1192972776_rel_intergenic # Thu Feb 3 15:37:21 GMT 2011: Classifying intergenic relations /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_intergenic_assoc; create table 1192972776_intergenic_assoc ( id INT(10) NOT NULL, target_region_id INT(10) NOT NULL, relation enum ('upstream', 'downstream', 'into') NOT NULL, distance INT(10) NOT NULL, num_regions_between INT(3) NOT NULL, attribute varchar(50) NOT NULL, primary key (id, target_region_id), key (id, attribute) ); insert into 1192972776_intergenic_assoc select snp.id, rel.target_region_id, if (rel.overlap > 0, 'into', if (feature.strand = '+', if (snp.end < feature.start, 'upstream', 'downstream'), if (feature.end < snp.start, 'upstream', 'downstream') ) ) relation, rel.distance, rel.num_regions_between, feature.attribute from 1192972776_snps snp join 1192972776_rel_intergenic rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id)" cat 1678059181_known_snps \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1678059181_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 1678059181_pileup" touch 1678059181_pileup # Thu Feb 3 15:37:30 GMT 2011: Writing snp read counts ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select f.id, f.dna, f.start + 1 start, e.snp_name, e.snp_base, if(e.snp_base = 'a', p.A_fwd + p.A_rev, if (e.snp_base = 'c', p.C_fwd + p.C_rev, if (e.snp_base = 'g', p.G_fwd + p.G_rev, if (e.snp_base = 't', p.T_fwd + p.T_rev, -1)))) snp_reads, p.ref_base, p.wt_fwd + p.wt_rev wt_reads, p.read_depth, p.sample_reads from elegans_features f join elegans_snp_changes e on (f.attribute = e.snp_name) join 1678059181_pileup p using (dna,start)" > /data/maqgene/out/example_user/ot340/ot340_snp_read_counts.txt 0 snp read count lines written. touch 1192972776_intergenic_assoc touch 1192972776_offsets_snps # Thu Feb 3 15:37:32 GMT 2011: Getting codons affected by point mutations... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_codons; create table 1192972776_codons ( id int(10) NOT NULL, gene varchar(50) NOT NULL, ref_codon char(4) NOT NULL, sam_codon char(4) NOT NULL, start char(1) NOT NULL, primary key (id, gene) ); insert into 1192972776_codons select snp.id, off.gene, @ref_codon := substr(gene_dna.sequence, (floor(if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1 ) / 3 ) * 3 ) + 1, 3 ) ref_codon, insert(@ref_codon, (if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1) % 3) + 1, 1, if(feature.strand = '+', snp.sample_base, substr('ACGTTGCA', instr('ACGTTGCA', snp.sample_base)+4, 1) ) ) sam_codon, if((feature.strand = '+' && off.5prime_offset < 3) || (feature.strand = '-' && off.5prime_offset - 1 < 3), 'Y', 'N') start from 1192972776_offsets_snps off join 1192972776_snps snp on (off.id = snp.id) join elegans_coding_dna gene_dna using (gene) join elegans_features feature on (gene_dna.gene = feature.attribute and feature.start <= snp.start and feature.end >= snp.end) where snp.variant_type = 'point' and feature.feature = 'exon' and off.boundary_type = 'start' and off.5prime_offset >= 0 and off.3prime_offset >= 0;" touch 1192972776_codons /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_marked; create table 1192972776_marked ( id int(10) NOT NULL, class varchar(100) NOT NULL, description varchar(100) NOT NULL, parent_feature varchar(100) NOT NULL, primary key (id, class, parent_feature) );" # Thu Feb 3 15:37:39 GMT 2011: Finding exonic indels ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_marked select snp.id, if (snp.indel_size < 0, if (rel.overlap % 3 = 0, 'inframe', 'frameshift'), if (snp.indel_size % 3 = 0, 'inframe', 'frameshift')) class, if (rel.overlap != snp.end - snp.start, 'exon_boundary', 'none') description, feature.attribute parent_feature from 1192972776_snps snp join 1192972776_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'exon' and snp.variant_type = 'indel';" # Thu Feb 3 15:37:40 GMT 2011: Finding coding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_marked select trans.id, if (reference_codons.amino = sample_codons.amino, 'silent', if (reference_codons.amino3 = 'stop', 'readthrough', if (sample_codons.amino3 = 'stop', 'premature_stop', if (trans.start = 'Y' and sample_codons.is_start != 'Y', 'non_start', 'missense') ) ) ) class, concat(ref_codon,'->',sam_codon,'[', reference_codons.amino3,'->',sample_codons.amino3,']') description, trans.gene from 1192972776_codons trans join elegans_genetic_code reference_codons on (trans.ref_codon = reference_codons.codon) join elegans_genetic_code sample_codons on (trans.sam_codon = sample_codons.codon);" # Thu Feb 3 15:37:40 GMT 2011: Finding noncoding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_marked select snp.id, feature.feature class, 'none' description, feature.attribute parent_feature from 1192972776_snps snp join 1192972776_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature not in ('exon', 'intron', 'mRNA');" # Thu Feb 3 15:37:40 GMT 2011: Finding splice site variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_marked select snp.id, if (feature.strand = '+', if (snp.start - feature.start < 2, 'splice_donor', 'splice_acceptor'), if (snp.start - feature.start < 2, 'splice_acceptor', 'splice_donor')) class, 'none' description, feature.attribute parent_feature from 1192972776_snps snp join 1192972776_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'intron' and rel.distance > -2;" # Thu Feb 3 15:37:40 GMT 2011: Finding intergenic and intronic variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_marked select id, 'nongenic' class, concat(if (relation in ('upstream','into'), - distance, distance),' ',relation) description, attribute from 1192972776_intergenic_assoc where ((relation = 'upstream' and num_regions_between = 0 and distance < 50000) or (relation = 'upstream' and num_regions_between < 2 and distance < 1000) or (relation = 'downstream' and num_regions_between = 0 and distance < 1000) or (relation = 'downstream' and num_regions_between < 2 and distance < 1000) or relation = 'into' );" # Thu Feb 3 15:37:43 GMT 2011: Finding uncovered regions in genes ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1192972776_marked select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',unc.start - gene.start,' to ',unc.end - gene.start,' bp into'), concat( if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), concat('codon ',floor(b.5prime_offset / 3) + 1) ) ), ' to ', if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), concat('codon ',ceiling(e.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(e.5prime_offset < 0,0,e.5prime_offset) - (if(b.5prime_offset < 0,0,b.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 1678059181_uncovered unc join 1678059181_offsets_uncovered b using (id) join 1678059181_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '+' and gene.feature = 'mRNA' union select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',gene.end - unc.end,' to ',gene.end - unc.start,' bp into'), concat( if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), concat('codon ',floor(e.5prime_offset / 3) + 1) ) ), ' to ', if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), concat('codon ',ceiling(b.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(b.5prime_offset < 0,0,b.5prime_offset) - (if(e.5prime_offset < 0,0,e.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 1678059181_uncovered unc join 1678059181_offsets_uncovered b using (id) join 1678059181_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '-' and gene.feature = 'mRNA'" # Thu Feb 3 15:37:43 GMT 2011: Variants found: +-----------------+-----------------+ | class | number_variants | +-----------------+-----------------+ | five_prime_UTR | 1249 | | missense | 6346 | | ncRNA | 875 | | nongenic | 130751 | | non_start | 10 | | premature_stop | 704 | | readthrough | 21 | | silent | 7492 | | SNP | 485 | | splice_acceptor | 49 | | splice_donor | 45 | | three_prime_UTR | 3536 | | uncovered | 1654 | +-----------------+-----------------+ touch 1192972776_marked # Thu Feb 3 15:37:44 GMT 2011: Combining results ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_combined; create table 1192972776_combined (primary key (id, class, description), key (dna, start)) as select snp.id, 'ot340' mutant_strain, snp.dna, snp.start + 1 start, snp.end - snp.start length, snp.ref_base reference_base, snp.sample_base sample_base, snp.snp_score consensus_score, snp.loci_multiplicity, snp.mapping_quality, snp.neighbor_quality, (pile.wt_fwd + pile.wt_rev) number_wildtype_reads, (snp.read_depth - (pile.wt_fwd + pile.wt_rev)) number_variant_reads, snp.read_depth sequencing_depth, pile.sample_reads, snp.variant_type, snp.indel_size, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 1192972776_snps snp join 1678059181_pileup pile using (dna,start) join 1192972776_marked class on (snp.id = class.id) left join elegans_per_locus using (dna,start) where (snp.variant_type = 'point' and (snp.snp_score >= 3 and snp.loci_multiplicity <= 10 and snp.neighbor_quality >= 0 and pile.read_depth >= 4 and ((pile.read_depth - pile.wt_fwd - pile.wt_rev) / pile.read_depth) >= 0.5) or snp.variant_type = 'indel' ) group by id, class, description union select unc.id id, 'ot340' mutant_strain, unc.dna, unc.start + 1 start, unc.end - unc.start length, '-' reference_base, '-' sample_base, 0 consensus_score, 0, 0, 0, 0 number_wildtype_reads, 0 number_variant_reads, 0 sequencing_depth, '' sample_reads, 'uncovered', 0, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 1678059181_uncovered unc join 1192972776_marked class on (unc.id = class.id) left join elegans_per_locus using (dna,start) group by id, class, description; alter table 1192972776_combined order by dna, start, length" touch 1192972776_combined # Thu Feb 3 15:37:45 GMT 2011: Writing results to grouped file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select id, mutant_strain, dna, start, length, reference_base, sample_base, consensus_score, loci_multiplicity, mapping_quality, neighbor_quality, number_wildtype_reads, number_variant_reads, sequencing_depth, sample_reads, variant_type, indel_size, group_concat(class) classes, group_concat(description) descriptions, group_concat(concat('{',parent_features,'}')) parent_features from 1192972776_combined group by id order by dna, start;" > /data/maqgene/out/example_user/ot340/ot340_grouped.txt # Thu Feb 3 15:37:45 GMT 2011: Writing results to flat file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select * from 1192972776_combined" > /data/maqgene/out/example_user/ot340/ot340_flat.txt 2071 lines written. 1731 lines written. rm 3039536873_split1 2393555056.00000.1.bfq 2393555056.00000.2.bfq 2393555056.00000.mismatch 3039536873_split2 2393555056.00000.1.fastq 2393555056.00000.unmapped 2393555056.00000.2.fastq 2393555056.00000.map make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' # Thu Feb 3 15:37:45 GMT 2011: Cleaning intermediate data ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1192972776_snps,1192972776_rel,1192972776_masked_ids,1192972776_rel_intergenic,1192972776_intergenic_assoc,1192972776_offsets,1192972776_codons,1192972776_marked,1192972776_combined,1678059181_pileup" rm -f 1192972776_snps 1192972776_rel 1192972776_masked_ids 1192972776_rel_intergenic 1192972776_intergenic_assoc 1192972776_offsets 1192972776_codons 1192972776_marked 1192972776_combined 1678059181_pileup make: Leaving directory `/data/maqgene/work'