Finished reading input. ++ echo CB7031/./s_1_1_sequence_read1.fastq ++ tr , ' ' + fastq_file_set1=CB7031/./s_1_1_sequence_read1.fastq ++ cd /data/maqgene/reads ++ wc -l ++ cat CB7031/./s_1_1_sequence_read1.fastq ++ cut -f 1 -d ' ' + TOTAL_LINES=101930508 ++ echo '(101930508 / 5000000) + (101930508 % 5000000 != 0)' ++ bc + num_chunks=21 + set +x make -j 14 -l 14 -C /data/maqgene/work --warn-undefined-variables -I /data/maqgene -f /data/maqgene/makefile BFA_FILE=elegans.bfa MAQGENE_GENOME_DIR=/data/maqgene/genomes reads_cksum=3069955231 map_cksum=519659138 pileup_cksum=3320924337 cns_cksum=306782340 umdd=50000 dmdd=1000 umid=1000 dmid=1000 max_gene_radius=50000 full_results_dir=/data/maqgene/out/example_user/CB7031 outfile_basename=CB7031 map_parameters=" -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0" assemble_parameters=" -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p " pileup_parameters=" -Q 100 -p -m 7 -q 0" CHUNK_SIZE=5000000 NUM_CHUNKS=21 Will process 101930508 lines of input in 21 chunks make: Entering directory `/data/maqgene/work' # Mon Sep 17 22:32:24 BST 2012: Removing frontend files in case this is a duplicate run ... rm -f /data/maqgene/out/example_user/CB7031/CB7031_{grouped.txt,flat.txt} rm -f /data/maqgene/out/example_user/CB7031/CB7031_uncovered.txt /data/maqgene/out/example_user/CB7031/CB7031_coverage.txt /data/maqgene/out/example_user/CB7031/CB7031_pileup.txt /data/maqgene/out/example_user/CB7031/CB7031_log.txt /data/maqgene/out/example_user/CB7031/CB7031_check.txt /data/maqgene/out/example_user/CB7031/CB7031_unmapped.txt make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' /data/maqgene/makefile:76: warning: undefined variable `date' /data/maqgene/makefile:78: warning: undefined variable `date' # : Regrouping fastq reads into chunks of size 5000000. split -l 5000000 -a 5 -d <(cat /data/maqgene/reads/CB7031/./s_1_1_sequence_read1.fastq) 519659138.1.fastq. # : Regrouping fastq reads into chunks of size 5000000. split -l 5000000 -a 5 -d <(cat /data/maqgene/reads/CB7031/./s_1_2_sequence_read2.fastq) 519659138.2.fastq. for stem in 00000 00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020; do mv 519659138.1.fastq.$stem 519659138.$stem.1.fastq; done touch 3069955231_split1 for stem in 00000 00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020; do mv 519659138.2.fastq.$stem 519659138.$stem.2.fastq; done touch 3069955231_split2 # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00000.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00000.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00001.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00001.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... ut | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00002.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00003.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00003.1.bfq /data/maqgene/bin/maq sol2sanger 519659138.00004.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00004.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00005.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00005.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00006.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00006.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00007.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00007.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00008.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00008.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00009.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00009.1.bfq /data/maqgene/bin/maq sol2sanger 519659138.00010.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00010.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00011.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00011.1.bfq /data/maqgene/bin/maq sol2sanger 519659138.00012.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00012.1.bfq # Mon Sep 17 22:32:49 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00013.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00013.1.bfq -- finish writing file '519659138.00003.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:10 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00014.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00014.1.bfq -- finish writing file '519659138.00010.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:15 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00015.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00015.1.bfq -- finish writing file '519659138.00002.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:15 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00016.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00016.1.bfq -- finish writing file '519659138.00011.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:15 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00017.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00017.1.bfq -- finish writing file '519659138.00004.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:15 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00018.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00018.1.bfq -- finish writing file '519659138.00013.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:17 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00019.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00019.1.bfq -- finish writing file '519659138.00007.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:17 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00020.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00020.1.bfq -- finish writing file '519659138.00012.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:17 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00000.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00000.2.bfq -- finish writing file '519659138.00006.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:17 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00001.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00001.2.bfq -- finish writing file '519659138.00001.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:19 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00002.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00002.2.bfq -- finish writing file '519659138.00008.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:19 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00003.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00003.2.bfq -- finish writing file '519659138.00009.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:19 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00004.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00004.2.bfq -- finish writing file '519659138.00000.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:19 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00005.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00005.2.bfq -- finish writing file '519659138.00005.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:20 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00006.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00006.2.bfq -- finish writing file '519659138.00020.1.bfq' -- 482627 sequences were loaded. # Mon Sep 17 22:33:28 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00007.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00007.2.bfq -- finish writing file '519659138.00004.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:36 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00008.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00008.2.bfq -- finish writing file '519659138.00014.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:38 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00009.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00009.2.bfq -- finish writing file '519659138.00018.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:40 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00010.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00010.2.bfq -- finish writing file '519659138.00016.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:41 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00011.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00011.2.bfq -- finish writing file '519659138.00015.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:43 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00012.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00012.2.bfq -- finish writing file '519659138.00006.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:43 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00013.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00013.2.bfq -- finish writing file '519659138.00019.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:45 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00014.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00014.2.bfq -- finish writing file '519659138.00000.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:45 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00015.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00015.2.bfq -- finish writing file '519659138.00017.1.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:45 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00016.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00016.2.bfq -- finish writing file '519659138.00001.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:46 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00017.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00017.2.bfq -- finish writing file '519659138.00005.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:47 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00018.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00018.2.bfq -- finish writing file '519659138.00002.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:47 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00019.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00019.2.bfq -- finish writing file '519659138.00003.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:48 BST 2012: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 519659138.00020.2.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 519659138.00020.2.bfq -- finish writing file '519659138.00007.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:33:53 BST 2012: Mapping file(s) 519659138.00000.1.bfq 519659138.00000.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00000.unmapped -H 519659138.00000.mismatch \ 519659138.00000.map /data/maqgene/genomes/elegans.bfa 519659138.00000.1.bfq 519659138.00000.2.bfq 2> /dev/null -- finish writing file '519659138.00020.2.bfq' -- 482627 sequences were loaded. # Mon Sep 17 22:33:58 BST 2012: Mapping file(s) 519659138.00001.1.bfq 519659138.00001.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00001.unmapped -H 519659138.00001.mismatch \ 519659138.00001.map /data/maqgene/genomes/elegans.bfa 519659138.00001.1.bfq 519659138.00001.2.bfq 2> /dev/null -- finish writing file '519659138.00011.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:00 BST 2012: Mapping file(s) 519659138.00002.1.bfq 519659138.00002.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00002.unmapped -H 519659138.00002.mismatch \ 519659138.00002.map /data/maqgene/genomes/elegans.bfa 519659138.00002.1.bfq 519659138.00002.2.bfq 2> /dev/null -- finish writing file '519659138.00008.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:04 BST 2012: Mapping file(s) 519659138.00003.1.bfq 519659138.00003.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00003.unmapped -H 519659138.00003.mismatch \ 519659138.00003.map /data/maqgene/genomes/elegans.bfa 519659138.00003.1.bfq 519659138.00003.2.bfq 2> /dev/null -- finish writing file '519659138.00009.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:05 BST 2012: Mapping file(s) 519659138.00004.1.bfq 519659138.00004.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00004.unmapped -H 519659138.00004.mismatch \ 519659138.00004.map /data/maqgene/genomes/elegans.bfa 519659138.00004.1.bfq 519659138.00004.2.bfq 2> /dev/null -- finish writing file '519659138.00010.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:08 BST 2012: Mapping file(s) 519659138.00005.1.bfq 519659138.00005.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00005.unmapped -H 519659138.00005.mismatch \ 519659138.00005.map /data/maqgene/genomes/elegans.bfa 519659138.00005.1.bfq 519659138.00005.2.bfq 2> /dev/null -- finish writing file '519659138.00013.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:08 BST 2012: Mapping file(s) 519659138.00006.1.bfq 519659138.00006.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00006.unmapped -H 519659138.00006.mismatch \ 519659138.00006.map /data/maqgene/genomes/elegans.bfa 519659138.00006.1.bfq 519659138.00006.2.bfq 2> /dev/null -- finish writing file '519659138.00014.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:08 BST 2012: Mapping file(s) 519659138.00007.1.bfq 519659138.00007.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00007.unmapped -H 519659138.00007.mismatch \ 519659138.00007.map /data/maqgene/genomes/elegans.bfa 519659138.00007.1.bfq 519659138.00007.2.bfq 2> /dev/null -- finish writing file '519659138.00012.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:09 BST 2012: Mapping file(s) 519659138.00008.1.bfq 519659138.00008.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00008.unmapped -H 519659138.00008.mismatch \ 519659138.00008.map /data/maqgene/genomes/elegans.bfa 519659138.00008.1.bfq 519659138.00008.2.bfq 2> /dev/null -- finish writing file '519659138.00016.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:10 BST 2012: Mapping file(s) 519659138.00009.1.bfq 519659138.00009.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00009.unmapped -H 519659138.00009.mismatch \ 519659138.00009.map /data/maqgene/genomes/elegans.bfa 519659138.00009.1.bfq 519659138.00009.2.bfq 2> /dev/null -- finish writing file '519659138.00019.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:10 BST 2012: Mapping file(s) 519659138.00010.1.bfq 519659138.00010.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00010.unmapped -H 519659138.00010.mismatch \ 519659138.00010.map /data/maqgene/genomes/elegans.bfa 519659138.00010.1.bfq 519659138.00010.2.bfq 2> /dev/null -- finish writing file '519659138.00015.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:11 BST 2012: Mapping file(s) 519659138.00011.1.bfq 519659138.00011.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00011.unmapped -H 519659138.00011.mismatch \ 519659138.00011.map /data/maqgene/genomes/elegans.bfa 519659138.00011.1.bfq 519659138.00011.2.bfq 2> /dev/null -- finish writing file '519659138.00017.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:13 BST 2012: Mapping file(s) 519659138.00012.1.bfq 519659138.00012.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00012.unmapped -H 519659138.00012.mismatch \ 519659138.00012.map /data/maqgene/genomes/elegans.bfa 519659138.00012.1.bfq 519659138.00012.2.bfq 2> /dev/null -- finish writing file '519659138.00018.2.bfq' -- 1250000 sequences were loaded. # Mon Sep 17 22:34:14 BST 2012: Mapping file(s) 519659138.00013.1.bfq 519659138.00013.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00013.unmapped -H 519659138.00013.mismatch \ 519659138.00013.map /data/maqgene/genomes/elegans.bfa 519659138.00013.1.bfq 519659138.00013.2.bfq 2> /dev/null # Mon Sep 17 22:40:09 BST 2012: Mapping file(s) 519659138.00015.1.bfq 519659138.00015.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00015.unmapped -H 519659138.00015.mismatch \ 519659138.00015.map /data/maqgene/genomes/elegans.bfa 519659138.00015.1.bfq 519659138.00015.2.bfq 2> /dev/null # Mon Sep 17 22:40:10 BST 2012: Mapping file(s) 519659138.00014.1.bfq 519659138.00014.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00014.unmapped -H 519659138.00014.mismatch \ 519659138.00014.map /data/maqgene/genomes/elegans.bfa 519659138.00014.1.bfq 519659138.00014.2.bfq 2> /dev/null # Mon Sep 17 22:40:10 BST 2012: Mapping file(s) 519659138.00016.1.bfq 519659138.00016.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00016.unmapped -H 519659138.00016.mismatch \ 519659138.00016.map /data/maqgene/genomes/elegans.bfa 519659138.00016.1.bfq 519659138.00016.2.bfq 2> /dev/null # Mon Sep 17 22:40:11 BST 2012: Mapping file(s) 519659138.00017.1.bfq 519659138.00017.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00017.unmapped -H 519659138.00017.mismatch \ 519659138.00017.map /data/maqgene/genomes/elegans.bfa 519659138.00017.1.bfq 519659138.00017.2.bfq 2> /dev/null # Mon Sep 17 22:40:13 BST 2012: Mapping file(s) 519659138.00018.1.bfq 519659138.00018.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00018.unmapped -H 519659138.00018.mismatch \ 519659138.00018.map /data/maqgene/genomes/elegans.bfa 519659138.00018.1.bfq 519659138.00018.2.bfq 2> /dev/null # Mon Sep 17 22:40:24 BST 2012: Mapping file(s) 519659138.00020.1.bfq 519659138.00020.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00020.unmapped -H 519659138.00020.mismatch \ 519659138.00020.map /data/maqgene/genomes/elegans.bfa 519659138.00020.1.bfq 519659138.00020.2.bfq 2> /dev/null # Mon Sep 17 22:40:27 BST 2012: Mapping file(s) 519659138.00019.1.bfq 519659138.00019.2.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -2 0 -a 500 -e 100 -A 500 -1 0 -u 519659138.00019.unmapped -H 519659138.00019.mismatch \ 519659138.00019.map /data/maqgene/genomes/elegans.bfa 519659138.00019.1.bfq 519659138.00019.2.bfq 2> /dev/null # Mon Sep 17 22:44:48 BST 2012: Merging all maps ... /data/maqgene/bin/maq mapmerge 519659138.map 519659138.00000.map 519659138.00001.map 519659138.00002.map 519659138.00003.map 519659138.00004.map 519659138.00005.map 519659138.00006.map 519659138.00007.map 519659138.00008.map 519659138.00009.map 519659138.00010.map 519659138.00011.map 519659138.00012.map 519659138.00013.map 519659138.00014.map 519659138.00015.map 519659138.00016.map 519659138.00017.map 519659138.00018.map 519659138.00019.map 519659138.00020.map # Mon Sep 17 22:44:48 BST 2012: Merging all *.unmapped files ... cat 519659138.00000.unmapped 519659138.00001.unmapped 519659138.00002.unmapped 519659138.00003.unmapped 519659138.00004.unmapped 519659138.00005.unmapped 519659138.00006.unmapped 519659138.00007.unmapped 519659138.00008.unmapped 519659138.00009.unmapped 519659138.00010.unmapped 519659138.00011.unmapped 519659138.00012.unmapped 519659138.00013.unmapped 519659138.00014.unmapped 519659138.00015.unmapped 519659138.00016.unmapped 519659138.00017.unmapped 519659138.00018.unmapped 519659138.00019.unmapped 519659138.00020.unmapped > 519659138_unmapped.txt # Mon Sep 17 22:44:49 BST 2012: Linking backend file 519659138_unmapped.txt to /data/maqgene/out/example_user/CB7031/CB7031_unmapped.txt ln -fs /data/maqgene/work/519659138_unmapped.txt /data/maqgene/out/example_user/CB7031/CB7031_unmapped.txt /data/maqgene/makefile:126: warning: undefined variable `mapcheck_parameters' # Mon Sep 17 22:52:57 BST 2012: Generating consensus ... # Mon Sep 17 22:52:57 BST 2012: Creating pileup ... /data/maqgene/bin/maq assemble -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p 306782340.cns \ /data/maqgene/genomes/elegans.bfa 519659138.map 2> 306782340_log.txt # Mon Sep 17 22:52:57 BST 2012: Running 'mapcheck' ... /data/maqgene/bin/maq pileup -Q 100 -p -m 7 -q 0 /data/maqgene/genomes/elegans.bfa 519659138.map > 3320924337_pileup.txt /data/maqgene/bin/maq mapcheck /data/maqgene/genomes/elegans.bfa 519659138.map > 519659138_check.txt \ 2>/dev/null # Mon Sep 17 22:54:48 BST 2012: Linking backend file 519659138_check.txt to /data/maqgene/out/example_user/CB7031/CB7031_check.txt ln -fs /data/maqgene/work/519659138_check.txt /data/maqgene/out/example_user/CB7031/CB7031_check.txt /data/maqgene/bin/filter_matching_lines 3320924337_pileup.txt "%s\t%i\t" \ <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select distinct dna, start + 1 from elegans_features where feature = 'SNP' order by dna, start") "%s\t%i\n" si \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 0 \ > 3320924337_known_snps # Mon Sep 17 22:55:13 BST 2012: Getting uncovered regions ... # Mon Sep 17 22:55:13 BST 2012: Making coverage histogram ... # Mon Sep 17 22:55:13 BST 2012: Linking backend file 3320924337_pileup.txt to /data/maqgene/out/example_user/CB7031/CB7031_pileup.txt cut -f 1,2,4 3320924337_pileup.txt \ | /data/maqgene/bin/get_uncovered_regions 50 \ > 3320924337_uncovered.txt (echo -en "sequencing_depth\tnumber_of_bases\n"; \ cut -f 4 3320924337_pileup.txt \ | /data/maqgene/bin/pileup_histogram 100) > 3320924337_coverage.txt ln -fs /data/maqgene/work/3320924337_pileup.txt /data/maqgene/out/example_user/CB7031/CB7031_pileup.txt # Mon Sep 17 22:55:29 BST 2012: Linking backend file 3320924337_coverage.txt to /data/maqgene/out/example_user/CB7031/CB7031_coverage.txt ln -fs /data/maqgene/work/3320924337_coverage.txt /data/maqgene/out/example_user/CB7031/CB7031_coverage.txt # Mon Sep 17 22:55:39 BST 2012: Linking backend file 3320924337_uncovered.txt to /data/maqgene/out/example_user/CB7031/CB7031_uncovered.txt ln -fs /data/maqgene/work/3320924337_uncovered.txt /data/maqgene/out/example_user/CB7031/CB7031_uncovered.txt # Mon Sep 17 22:55:39 BST 2012: Filtering and loading pileup for analysis ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3320924337_pileup; create table 3320924337_pileup ( dna enum('I','II','III','IV','MtDNA','V','X') NOT NULL, start int(10) NOT NULL, ref_base char(1) NOT NULL, read_depth int(5) NOT NULL, sample_reads varchar(100) NOT NULL, A_fwd int(5) NOT NULL, A_rev int(5) NOT NULL, C_fwd int(5) NOT NULL, C_rev int(5) NOT NULL, G_fwd int(5) NOT NULL, G_rev int(5) NOT NULL, T_fwd int(5) NOT NULL, T_rev int(5) NOT NULL, wt_fwd int(5) NOT NULL, wt_rev int(5) NOT NULL, primary key (dna, start) );" cat 3320924337_pileup.txt \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 2 \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 3320924337_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 3320924337_pileup" # Mon Sep 17 22:56:16 BST 2012: Linking backend file 306782340_log.txt to /data/maqgene/out/example_user/CB7031/CB7031_log.txt # Mon Sep 17 22:56:16 BST 2012: Extracting point mutants from consensus ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_snps; create table 306782340_snps ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, indel_size int(5) NOT NULL, variant_type enum('indel', 'point') NOT NULL, ref_base char NOT NULL, sample_base char NOT NULL, snp_score int(5) NOT NULL, read_depth int(5) NOT NULL, loci_multiplicity double(5,2) NOT NULL, mapping_quality int(5) NOT NULL, neighbor_quality int(5) NOT NULL, primary key (id), unique key (dna, start), key (indel_size) )" ln -fs /data/maqgene/work/306782340_log.txt /data/maqgene/out/example_user/CB7031/CB7031_log.txt /data/maqgene/bin/maq cns2snp 306782340.cns \ | cat -b | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 306782340_snps (id, dna, @start, ref_base, sample_base, snp_score, read_depth, loci_multiplicity, mapping_quality, neighbor_quality) set start = @start - 1, end = @start, indel_size = 0, variant_type = 'point'; flush table 306782340_snps;" # Mon Sep 17 22:56:22 BST 2012: Extracting indels from consensus ... /data/maqgene/bin/maq indelsoa /data/maqgene/genomes/elegans.bfa 519659138.map \ | awk '{ if ($5+$6-$4 >= 5 && $4 <= 1) { print $0 } }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 306782340_snps (dna, @start, indel_size, @num_spanning_reads, @num_left_reads, @num_right_reads, @junk) set id = NULL, start = @start - 1, end = @start - 1 + if (indel_size > 0, 0, -indel_size), ref_base = 'X', sample_base = 'X', variant_type = 'indel', snp_score = -1000, read_depth = if (@num_left_reads < @num_right_reads, @num_left_reads, @num_right_reads), loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 306782340_snps;" cat 3320924337_known_snps \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 3320924337_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 3320924337_pileup" touch 3320924337_pileup # Mon Sep 17 22:56:23 BST 2012: Writing snp read counts ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select f.id, f.dna, f.start + 1 start, e.snp_name, e.snp_base, if(e.snp_base = 'a', p.A_fwd + p.A_rev, if (e.snp_base = 'c', p.C_fwd + p.C_rev, if (e.snp_base = 'g', p.G_fwd + p.G_rev, if (e.snp_base = 't', p.T_fwd + p.T_rev, -1)))) snp_reads, p.ref_base, p.wt_fwd + p.wt_rev wt_reads, p.read_depth, p.sample_reads from elegans_features f join elegans_snp_changes e on (f.attribute = e.snp_name) join 3320924337_pileup p using (dna,start)" > /data/maqgene/out/example_user/CB7031/CB7031_snp_read_counts.txt 0 snp read count lines written. # Mon Sep 17 22:57:19 BST 2012: Adding placeholders for known SNPs. cat 3320924337_known_snps \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' ignore into table 306782340_snps (dna, @start, ref_base, read_depth, @junk) set id = NULL, start = @start - 1, end = @start - 1, variant_type = 'point', snp_score = -1000, loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 306782340_snps;" Found 163548 variants. touch 306782340_snps # Mon Sep 17 22:57:20 BST 2012: Loading uncovered regions into table ... cat 3320924337_uncovered.txt \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3320924337_uncovered; create table 3320924337_uncovered ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, primary key (id) ) auto_increment = 163549; load data local infile '/dev/stdin' into table 3320924337_uncovered (dna, start, end)" # Mon Sep 17 22:57:20 BST 2012: Finding all genomic features overlapping variants... # Mon Sep 17 22:57:20 BST 2012: Getting masking regions /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_rel_snps; CREATE TABLE 306782340_rel_snps ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 2 -o >(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "drop table if exists 306782340_masked_ids; create table 306782340_masked_ids (query_region_id int(10) NOT NULL, primary key (query_region_id)); load data local infile '/dev/stdin' ignore into table 306782340_masked_ids (@j1, query_region_id, @j3, @j4, @j5, @j6, @j7)") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 306782340_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id,'elegans',dna,start,end,strand from elegans_features feature where feature.feature not in ('mRNA', 'intron')") 1>/dev/null; Uncovered region statistics: /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 306782340_rel_snps; flush table 306782340_rel_snps") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 306782340_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null chromosome number_uncovered_regions total_uncovered_length I 299 25556 II 250 23386 III 283 23769 IV 153 13361 MtDNA 1 216 V 234 23836 X 57 10103 touch 3320924337_uncovered # Mon Sep 17 22:57:21 BST 2012: Finding mRNA features overlapping uncovered regions ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3320924337_uncovered_rel; CREATE TABLE 3320924337_uncovered_rel ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 3320924337_uncovered_rel; flush table 3320924337_uncovered_rel") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 3320924337_uncovered") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null touch 306782340_masked_ids touch 3320924337_uncovered_rel # Mon Sep 17 22:57:23 BST 2012: Calculating translation start offsets for uncovered regions... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 3320924337_offsets_uncovered; create table 3320924337_offsets_uncovered ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 3320924337_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 3320924337_uncovered reg join 3320924337_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" touch 306782340_rel_snps # Mon Sep 17 22:57:24 BST 2012: Calculating translation start offsets... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_offsets_snps; create table 306782340_offsets_snps ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" # Mon Sep 17 22:57:24 BST 2012: Getting intergenic regions /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_rel_intergenic; create table 306782340_rel_intergenic like 306782340_rel_snps"; /data/maqgene/bin/associate_regions -b 50000 -c -1000000 -m 2 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 306782340_rel_intergenic; flush table 306782340_rel_intergenic") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 306782340_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans',dna,start,end,strand from elegans_features feature where feature.feature = 'mRNA'") 1>/dev/null; /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 3320924337_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start))), if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))), if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start)))), sum(exon.end - exon.start) total_length, 'end' boundary_type from 3320924337_uncovered reg join 3320924337_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_offsets_snps select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 306782340_snps reg join 306782340_rel_snps rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" touch 3320924337_offsets_uncovered # Mon Sep 17 22:57:27 BST 2012: Filtering masked intergenic regions /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "delete i.* from 306782340_rel_intergenic i join 306782340_masked_ids m using (query_region_id)" touch 306782340_rel_intergenic # Mon Sep 17 22:57:32 BST 2012: Classifying intergenic relations /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_intergenic_assoc; create table 306782340_intergenic_assoc ( id INT(10) NOT NULL, target_region_id INT(10) NOT NULL, relation enum ('upstream', 'downstream', 'into') NOT NULL, distance INT(10) NOT NULL, num_regions_between INT(3) NOT NULL, attribute varchar(50) NOT NULL, primary key (id, target_region_id), key (id, attribute) ); insert into 306782340_intergenic_assoc select snp.id, rel.target_region_id, if (rel.overlap > 0, 'into', if (feature.strand = '+', if (snp.end < feature.start, 'upstream', 'downstream'), if (feature.end < snp.start, 'upstream', 'downstream') ) ) relation, rel.distance, rel.num_regions_between, feature.attribute from 306782340_snps snp join 306782340_rel_intergenic rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id)" touch 306782340_intergenic_assoc touch 306782340_offsets_snps # Mon Sep 17 22:57:44 BST 2012: Getting codons affected by point mutations... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_codons; create table 306782340_codons ( id int(10) NOT NULL, gene varchar(50) NOT NULL, ref_codon char(4) NOT NULL, sam_codon char(4) NOT NULL, start char(1) NOT NULL, primary key (id, gene) ); insert into 306782340_codons select snp.id, off.gene, @ref_codon := substr(gene_dna.sequence, (floor(if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1 ) / 3 ) * 3 ) + 1, 3 ) ref_codon, insert(@ref_codon, (if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1) % 3) + 1, 1, if(feature.strand = '+', snp.sample_base, substr('ACGTTGCA', instr('ACGTTGCA', snp.sample_base)+4, 1) ) ) sam_codon, if((feature.strand = '+' && off.5prime_offset < 3) || (feature.strand = '-' && off.5prime_offset - 1 < 3), 'Y', 'N') start from 306782340_offsets_snps off join 306782340_snps snp on (off.id = snp.id) join elegans_coding_dna gene_dna using (gene) join elegans_features feature on (gene_dna.gene = feature.attribute and feature.start <= snp.start and feature.end >= snp.end) where snp.variant_type = 'point' and feature.feature = 'exon' and off.boundary_type = 'start' and off.5prime_offset >= 0 and off.3prime_offset >= 0;" touch 306782340_codons /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_marked; create table 306782340_marked ( id int(10) NOT NULL, class varchar(100) NOT NULL, description varchar(100) NOT NULL, parent_feature varchar(100) NOT NULL, primary key (id, class, parent_feature) );" # Mon Sep 17 22:57:51 BST 2012: Finding exonic indels ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_marked select snp.id, if (snp.indel_size < 0, if (rel.overlap % 3 = 0, 'inframe', 'frameshift'), if (snp.indel_size % 3 = 0, 'inframe', 'frameshift')) class, if (rel.overlap != snp.end - snp.start, 'exon_boundary', 'none') description, feature.attribute parent_feature from 306782340_snps snp join 306782340_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'exon' and snp.variant_type = 'indel';" # Mon Sep 17 22:57:52 BST 2012: Finding coding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_marked select trans.id, if (reference_codons.amino = sample_codons.amino, 'silent', if (reference_codons.amino3 = 'stop', 'readthrough', if (sample_codons.amino3 = 'stop', 'premature_stop', if (trans.start = 'Y' and sample_codons.is_start != 'Y', 'non_start', 'missense') ) ) ) class, concat(ref_codon,'->',sam_codon,'[', reference_codons.amino3,'->',sample_codons.amino3,']') description, trans.gene from 306782340_codons trans join elegans_genetic_code reference_codons on (trans.ref_codon = reference_codons.codon) join elegans_genetic_code sample_codons on (trans.sam_codon = sample_codons.codon);" # Mon Sep 17 22:57:52 BST 2012: Finding noncoding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_marked select snp.id, feature.feature class, 'none' description, feature.attribute parent_feature from 306782340_snps snp join 306782340_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature not in ('exon', 'intron', 'mRNA');" # Mon Sep 17 22:57:52 BST 2012: Finding splice site variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_marked select snp.id, if (feature.strand = '+', if (snp.start - feature.start < 2, 'splice_donor', 'splice_acceptor'), if (snp.start - feature.start < 2, 'splice_acceptor', 'splice_donor')) class, 'none' description, feature.attribute parent_feature from 306782340_snps snp join 306782340_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'intron' and rel.distance > -2;" # Mon Sep 17 22:57:53 BST 2012: Finding intergenic and intronic variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_marked select id, 'nongenic' class, concat(if (relation in ('upstream','into'), - distance, distance),' ',relation) description, attribute from 306782340_intergenic_assoc where ((relation = 'upstream' and num_regions_between = 0 and distance < 50000) or (relation = 'upstream' and num_regions_between < 2 and distance < 1000) or (relation = 'downstream' and num_regions_between = 0 and distance < 1000) or (relation = 'downstream' and num_regions_between < 2 and distance < 1000) or relation = 'into' );" # Mon Sep 17 22:57:55 BST 2012: Finding uncovered regions in genes ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 306782340_marked select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',unc.start - gene.start,' to ',unc.end - gene.start,' bp into'), concat( if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), concat('codon ',floor(b.5prime_offset / 3) + 1) ) ), ' to ', if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), concat('codon ',ceiling(e.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(e.5prime_offset < 0,0,e.5prime_offset) - (if(b.5prime_offset < 0,0,b.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 3320924337_uncovered unc join 3320924337_offsets_uncovered b using (id) join 3320924337_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '+' and gene.feature = 'mRNA' union select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',gene.end - unc.end,' to ',gene.end - unc.start,' bp into'), concat( if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), concat('codon ',floor(e.5prime_offset / 3) + 1) ) ), ' to ', if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), concat('codon ',ceiling(b.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(b.5prime_offset < 0,0,b.5prime_offset) - (if(e.5prime_offset < 0,0,e.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 3320924337_uncovered unc join 3320924337_offsets_uncovered b using (id) join 3320924337_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '-' and gene.feature = 'mRNA'" # Mon Sep 17 22:57:55 BST 2012: Variants found: +-----------------+-----------------+ | class | number_variants | +-----------------+-----------------+ | five_prime_UTR | 1294 | | frameshift | 27 | | inframe | 9 | | missense | 6196 | | ncRNA | 889 | | nongenic | 126925 | | non_start | 11 | | premature_stop | 687 | | readthrough | 20 | | silent | 7423 | | SNP | 1018 | | splice_acceptor | 47 | | splice_donor | 51 | | three_prime_UTR | 3583 | | uncovered | 889 | +-----------------+-----------------+ touch 306782340_marked # Mon Sep 17 22:57:56 BST 2012: Combining results ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_combined; create table 306782340_combined (primary key (id, class, description), key (dna, start)) as select snp.id, 'CB7031' mutant_strain, snp.dna, snp.start + 1 start, snp.end - snp.start length, snp.ref_base reference_base, snp.sample_base sample_base, snp.snp_score consensus_score, snp.loci_multiplicity, snp.mapping_quality, snp.neighbor_quality, (pile.wt_fwd + pile.wt_rev) number_wildtype_reads, (snp.read_depth - (pile.wt_fwd + pile.wt_rev)) number_variant_reads, snp.read_depth sequencing_depth, pile.sample_reads, snp.variant_type, snp.indel_size, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 306782340_snps snp join 3320924337_pileup pile using (dna,start) join 306782340_marked class on (snp.id = class.id) left join elegans_per_locus using (dna,start) where (snp.variant_type = 'point' and (snp.snp_score >= 3 and snp.loci_multiplicity <= 10 and snp.neighbor_quality >= 0 and pile.read_depth >= 4 and ((pile.read_depth - pile.wt_fwd - pile.wt_rev) / pile.read_depth) >= 0.5) or snp.variant_type = 'indel' ) group by id, class, description union select unc.id id, 'CB7031' mutant_strain, unc.dna, unc.start + 1 start, unc.end - unc.start length, '-' reference_base, '-' sample_base, 0 consensus_score, 0, 0, 0, 0 number_wildtype_reads, 0 number_variant_reads, 0 sequencing_depth, '' sample_reads, 'uncovered', 0, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 3320924337_uncovered unc join 306782340_marked class on (unc.id = class.id) left join elegans_per_locus using (dna,start) group by id, class, description; alter table 306782340_combined order by dna, start, length" touch 306782340_combined # Mon Sep 17 22:57:57 BST 2012: Writing results to grouped file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select id, mutant_strain, dna, start, length, reference_base, sample_base, consensus_score, loci_multiplicity, mapping_quality, neighbor_quality, number_wildtype_reads, number_variant_reads, sequencing_depth, sample_reads, variant_type, indel_size, group_concat(class) classes, group_concat(description) descriptions, group_concat(concat('{',parent_features,'}')) parent_features from 306782340_combined group by id order by dna, start;" > /data/maqgene/out/example_user/CB7031/CB7031_grouped.txt # Mon Sep 17 22:57:57 BST 2012: Writing results to flat file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select * from 306782340_combined" > /data/maqgene/out/example_user/CB7031/CB7031_flat.txt 4240 lines written. 2768 lines written. rm 519659138.00002.unmapped 519659138.00020.unmapped 519659138.00013.2.bfq 519659138.00017.1.fastq 519659138.00014.map 519659138.00013.mismatch 519659138.00004.mismatch 519659138.00001.2.fastq 519659138.00010.2.fastq 519659138.00002.1.fastq 519659138.00020.1.fastq 519659138.00009.unmapped 519659138.00019.1.bfq 519659138.00016.2.fastq 519659138.00015.1.bfq 519659138.00011.2.fastq 519659138.00012.1.fastq 519659138.00005.mismatch 519659138.00017.2.fastq 519659138.00018.unmapped 519659138.00002.2.fastq 519659138.00020.2.fastq 519659138.00019.2.bfq 519659138.00018.1.fastq 519659138.00019.mismatch 519659138.00011.1.bfq 519659138.00015.2.bfq 519659138.00012.2.fastq 519659138.00015.mismatch 519659138.00007.1.fastq 519659138.00003.unmapped 519659138.00006.map 519659138.00003.1.fastq 519659138.00018.2.fastq 519659138.00004.1.bfq 519659138.00013.unmapped 519659138.00000.map 519659138.00013.1.fastq 519659138.00008.1.fastq 519659138.00001.map 519659138.00010.map 519659138.00014.1.bfq 519659138.00003.2.fastq 519659138.00016.map 519659138.00005.unmapped 519659138.00004.2.bfq 519659138.00005.1.fastq 519659138.00011.map 519659138.00007.map 519659138.00013.2.fastq 519659138.00019.unmapped 519659138.00017.map 519659138.00008.map 519659138.00019.1.fastq 519659138.00002.map 519659138.00020.map 519659138.00015.unmapped 519659138.00009.1.fastq 519659138.00014.mismatch 519659138.00015.1.fastq 519659138.00007.2.fastq 519659138.00005.2.fastq 519659138.00012.map 3069955231_split1 519659138.00010.mismatch 519659138.00007.1.bfq 519659138.00019.2.fastq 519659138.00006.1.bfq 519659138.00009.map 519659138.00018.map 519659138.00015.2.fastq 519659138.00001.mismatch 519659138.00008.2.fastq 519659138.00000.1.bfq 519659138.00002.1.bfq 519659138.00004.unmapped 519659138.00003.map 519659138.00001.1.bfq 519659138.00010.1.bfq 519659138.00008.1.bfq 519659138.00004.1.fastq 519659138.00006.mismatch 519659138.00005.2.bfq 519659138.00016.1.bfq 519659138.00000.2.bfq 519659138.00014.unmapped 519659138.00013.map 519659138.00000.mismatch 519659138.00009.2.fastq 519659138.00014.1.fastq 519659138.00006.2.bfq 519659138.00017.1.bfq 519659138.00001.2.bfq 519659138.00010.2.bfq 519659138.00020.1.bfq 519659138.00004.2.fastq 3069955231_split2 519659138.00016.2.bfq 519659138.00009.1.bfq 519659138.00016.mismatch 519659138.00005.map 519659138.00007.mismatch 519659138.00007.2.bfq 519659138.00011.2.bfq 519659138.00012.1.bfq 519659138.00011.mismatch 519659138.00014.2.fastq 519659138.00019.map 519659138.00017.2.bfq 519659138.00017.mismatch 519659138.00002.2.bfq 519659138.00020.2.bfq 519659138.00002.mismatch 519659138.00015.map 519659138.00020.mismatch 519659138.00018.1.bfq 519659138.00006.unmapped 519659138.00007.unmapped 519659138.00012.2.bfq 519659138.00008.mismatch 519659138.00008.2.bfq 519659138.00012.mismatch 519659138.00006.1.fastq 519659138.00000.unmapped 519659138.00003.1.bfq 519659138.00018.2.bfq 519659138.00000.1.fastq 519659138.00018.mismatch 519659138.00001.unmapped 519659138.00010.unmapped 519659138.00013.1.bfq 519659138.00014.2.bfq 519659138.00016.unmapped 519659138.00008.unmapped 519659138.00009.mismatch 519659138.00006.2.fastq 519659138.00001.1.fastq 519659138.00010.1.fastq 519659138.00009.2.bfq 519659138.00016.1.fastq 519659138.00004.map 519659138.00003.2.bfq 519659138.00011.unmapped 519659138.00003.mismatch 519659138.00000.2.fastq 519659138.00012.unmapped 519659138.00011.1.fastq 519659138.00017.unmapped 519659138.00005.1.bfq make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' # Mon Sep 17 22:58:00 BST 2012: Cleaning intermediate data ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 306782340_snps,306782340_rel,306782340_masked_ids,306782340_rel_intergenic,306782340_intergenic_assoc,306782340_offsets,306782340_codons,306782340_marked,306782340_combined,3320924337_pileup" rm -f 306782340_snps 306782340_rel 306782340_masked_ids 306782340_rel_intergenic 306782340_intergenic_assoc 306782340_offsets 306782340_codons 306782340_marked 306782340_combined 3320924337_pileup make: Leaving directory `/data/maqgene/work'