/data/maqgene/bin/run_maq.sh: line 44: export: `single_CB1122-2/./WTCHG_54189_02_1.fastq=on': not a valid identifier /data/maqgene/bin/run_maq.sh: line 44: export: `single_CB1122-2/./WTCHG_54189_02_2.fastq=on': not a valid identifier Finished reading input. ++ echo CB1122-2/./WTCHG_54189_02_1.fastq,CB1122-2/./WTCHG_54189_02_2.fastq ++ tr , ' ' + fastq_file_set1='CB1122-2/./WTCHG_54189_02_1.fastq CB1122-2/./WTCHG_54189_02_2.fastq' ++ cd /data/maqgene/reads ++ cat CB1122-2/./WTCHG_54189_02_1.fastq CB1122-2/./WTCHG_54189_02_2.fastq ++ wc -l ++ cut -f 1 -d ' ' cat: CB1122-2/./WTCHG_54189_02_2.fastq: Permission denied + TOTAL_LINES=120730748 ++ echo '(120730748 / 5000000) + (120730748 % 5000000 != 0)' ++ bc + num_chunks=25 + set +x make -j 14 -l 14 -C /data/maqgene/work --warn-undefined-variables -I /data/maqgene -f /data/maqgene/makefile BFA_FILE=elegans.bfa MAQGENE_GENOME_DIR=/data/maqgene/genomes reads_cksum=3538352084 map_cksum=800044356 pileup_cksum=1449392424 cns_cksum=2232948069 umdd=50000 dmdd=1000 umid=1000 dmid=1000 max_gene_radius=50000 full_results_dir=/data/maqgene/out/example_user/CB1122_2 outfile_basename=CB1122_2 map_parameters=" -m 0.00001 -C 250 -n 2 -e 100 -1 0" assemble_parameters=" -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p " pileup_parameters=" -Q 100 -m 7 -q 0" CHUNK_SIZE=5000000 NUM_CHUNKS=25 Will process 120730748 lines of input in 25 chunks make: Entering directory `/data/maqgene/work' # Sun Apr 21 21:36:30 BST 2013: Removing frontend files in case this is a duplicate run ... rm -f /data/maqgene/out/example_user/CB1122_2/CB1122_2_{grouped.txt,flat.txt} rm -f /data/maqgene/out/example_user/CB1122_2/CB1122_2_uncovered.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_coverage.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_pileup.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_log.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_check.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_unmapped.txt make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' /data/maqgene/makefile:73: warning: undefined variable `date' # : Regrouping fastq reads into chunks of size 5000000. split -l 5000000 -a 5 -d <(cat /data/maqgene/reads/CB1122-2/./WTCHG_54189_02_1.fastq /data/maqgene/reads/CB1122-2/./WTCHG_54189_02_2.fastq) 800044356.1.fastq. cat: /data/maqgene/reads/CB1122-2/./WTCHG_54189_02_2.fastq: Permission denied for stem in 00000 00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 00023 00024; do mv 800044356.1.fastq.$stem 800044356.$stem.1.fastq; done touch 3538352084_split1 # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... ut | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00002.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00005.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00005.1.bfq /data/maqgene/bin/maq sol2sanger 800044356.00003.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00003.1.bfq /data/maqgene/bin/maq sol2sanger 800044356.00004.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00004.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00001.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00001.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00000.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00000.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00006.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00006.1.bfq /data/maqgene/bin/maq sol2sanger 800044356.00009.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00009.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00007.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00007.1.bfq /data/maqgene/bin/maq sol2sanger 800044356.00008.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00008.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00010.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00010.1.bfq # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... # Sun Apr 21 21:36:48 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00011.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00011.1.bfq /data/maqgene/bin/maq sol2sanger 800044356.00012.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00012.1.bfq # Sun Apr 21 21:36:49 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00013.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00013.1.bfq -- finish writing file '800044356.00000.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00006.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00010.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00009.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00002.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00012.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00007.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00001.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00005.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00013.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00004.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00008.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00003.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00011.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 21:37:49 BST 2013: Mapping file(s) 800044356.00001.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00001.unmapped -H 800044356.00001.mismatch \ 800044356.00001.map /data/maqgene/genomes/elegans.bfa 800044356.00001.1.bfq 2> /dev/null # Sun Apr 21 21:38:57 BST 2013: Mapping file(s) 800044356.00002.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00002.unmapped -H 800044356.00002.mismatch \ 800044356.00002.map /data/maqgene/genomes/elegans.bfa 800044356.00002.1.bfq 2> /dev/null # Sun Apr 21 21:38:58 BST 2013: Mapping file(s) 800044356.00000.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00000.unmapped -H 800044356.00000.mismatch \ 800044356.00000.map /data/maqgene/genomes/elegans.bfa 800044356.00000.1.bfq 2> /dev/null # Sun Apr 21 21:38:59 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00024.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00024.1.bfq # Sun Apr 21 21:39:00 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00023.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00023.1.bfq # Sun Apr 21 21:39:01 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00022.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00022.1.bfq -- finish writing file '800044356.00024.1.bfq' -- 182687 sequences were loaded. -- finish writing file '800044356.00022.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00023.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 21:40:31 BST 2013: Mapping file(s) 800044356.00005.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00005.unmapped -H 800044356.00005.mismatch \ 800044356.00005.map /data/maqgene/genomes/elegans.bfa 800044356.00005.1.bfq 2> /dev/null # Sun Apr 21 21:40:33 BST 2013: Mapping file(s) 800044356.00004.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00004.unmapped -H 800044356.00004.mismatch \ 800044356.00004.map /data/maqgene/genomes/elegans.bfa 800044356.00004.1.bfq 2> /dev/null # Sun Apr 21 21:40:35 BST 2013: Mapping file(s) 800044356.00003.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00003.unmapped -H 800044356.00003.mismatch \ 800044356.00003.map /data/maqgene/genomes/elegans.bfa 800044356.00003.1.bfq 2> /dev/null # Sun Apr 21 21:47:07 BST 2013: Mapping file(s) 800044356.00007.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00007.unmapped -H 800044356.00007.mismatch \ 800044356.00007.map /data/maqgene/genomes/elegans.bfa 800044356.00007.1.bfq 2> /dev/null # Sun Apr 21 21:47:09 BST 2013: Mapping file(s) 800044356.00006.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00006.unmapped -H 800044356.00006.mismatch \ 800044356.00006.map /data/maqgene/genomes/elegans.bfa 800044356.00006.1.bfq 2> /dev/null # Sun Apr 21 21:47:11 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00021.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00021.1.bfq -- finish writing file '800044356.00021.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 21:49:03 BST 2013: Mapping file(s) 800044356.00010.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00010.unmapped -H 800044356.00010.mismatch \ 800044356.00010.map /data/maqgene/genomes/elegans.bfa 800044356.00010.1.bfq 2> /dev/null # Sun Apr 21 21:49:04 BST 2013: Mapping file(s) 800044356.00009.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00009.unmapped -H 800044356.00009.mismatch \ 800044356.00009.map /data/maqgene/genomes/elegans.bfa 800044356.00009.1.bfq 2> /dev/null # Sun Apr 21 21:49:05 BST 2013: Mapping file(s) 800044356.00008.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00008.unmapped -H 800044356.00008.mismatch \ 800044356.00008.map /data/maqgene/genomes/elegans.bfa 800044356.00008.1.bfq 2> /dev/null # Sun Apr 21 21:49:06 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00020.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00020.1.bfq -- finish writing file '800044356.00020.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 21:52:09 BST 2013: Mapping file(s) 800044356.00020.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00020.unmapped -H 800044356.00020.mismatch \ 800044356.00020.map /data/maqgene/genomes/elegans.bfa 800044356.00020.1.bfq 2> /dev/null # Sun Apr 21 21:52:10 BST 2013: Mapping file(s) 800044356.00013.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00013.unmapped -H 800044356.00013.mismatch \ 800044356.00013.map /data/maqgene/genomes/elegans.bfa 800044356.00013.1.bfq 2> /dev/null # Sun Apr 21 21:52:12 BST 2013: Mapping file(s) 800044356.00012.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00012.unmapped -H 800044356.00012.mismatch \ 800044356.00012.map /data/maqgene/genomes/elegans.bfa 800044356.00012.1.bfq 2> /dev/null # Sun Apr 21 21:58:29 BST 2013: Mapping file(s) 800044356.00024.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00024.unmapped -H 800044356.00024.mismatch \ 800044356.00024.map /data/maqgene/genomes/elegans.bfa 800044356.00024.1.bfq 2> /dev/null # Sun Apr 21 21:58:40 BST 2013: Mapping file(s) 800044356.00022.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00023.unmapped -H 800044356.00023.mismatch \ 800044356.00023.map /data/maqgene/genomes/elegans.bfa 800044356.00023.1.bfq 2> /dev/null /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00022.unmapped -H 800044356.00022.mismatch \ 800044356.00022.map /data/maqgene/genomes/elegans.bfa 800044356.00022.1.bfq 2> /dev/null /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00021.unmapped -H 800044356.00021.mismatch \ 800044356.00021.map /data/maqgene/genomes/elegans.bfa 800044356.00021.1.bfq 2> /dev/null # Sun Apr 21 21:59:11 BST 2013: Mapping file(s) 800044356.00011.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00011.unmapped -H 800044356.00011.mismatch \ 800044356.00011.map /data/maqgene/genomes/elegans.bfa 800044356.00011.1.bfq 2> /dev/null # Sun Apr 21 21:59:11 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00019.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00019.1.bfq # Sun Apr 21 21:59:11 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00018.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00018.1.bfq -- finish writing file '800044356.00018.1.bfq' -- 1250000 sequences were loaded. -- finish writing file '800044356.00019.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 22:04:09 BST 2013: Mapping file(s) 800044356.00018.1.bfq # Sun Apr 21 22:04:09 BST 2013: Mapping file(s) 800044356.00019.1.bfq 356.00018.unmapped -H 800044356.00018.mismatch \ 800044356.00018.map /data/maqgene/genomes/elegans.bfa 800044356.00018.1.bfq 2> /dev/null /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00019.unmapped -H 800044356.00019.mismatch \ 800044356.00019.map /data/maqgene/genomes/elegans.bfa 800044356.00019.1.bfq 2> /dev/null # Sun Apr 21 22:07:41 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00017.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00017.1.bfq # Sun Apr 21 22:07:58 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00016.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00016.1.bfq -- finish writing file '800044356.00017.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 22:08:14 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq sol2sanger 800044356.00015.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00015.1.bfq -- finish writing file '800044356.00016.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 22:08:31 BST 2013: Mapping file(s) 800044356.00017.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00017.unmapped -H 800044356.00017.mismatch \ 800044356.00017.map /data/maqgene/genomes/elegans.bfa 800044356.00017.1.bfq 2> /dev/null -- finish writing file '800044356.00015.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 22:08:46 BST 2013: Mapping file(s) 800044356.00016.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00016.unmapped -H 800044356.00016.mismatch \ 800044356.00016.map /data/maqgene/genomes/elegans.bfa 800044356.00016.1.bfq 2> /dev/null # Sun Apr 21 22:09:33 BST 2013: Mapping file(s) 800044356.00015.1.bfq # Sun Apr 21 22:09:33 BST 2013: Converting fastq files to bfq ... /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00015.unmapped -H 800044356.00015.mismatch \ 800044356.00015.map /data/maqgene/genomes/elegans.bfa 800044356.00015.1.bfq 2> /dev/null /data/maqgene/bin/maq sol2sanger 800044356.00014.1.fastq /dev/stdout | \ /data/maqgene/bin/maq fastq2bfq /dev/stdin 800044356.00014.1.bfq -- finish writing file '800044356.00014.1.bfq' -- 1250000 sequences were loaded. # Sun Apr 21 22:10:01 BST 2013: Mapping file(s) 800044356.00014.1.bfq /data/maqgene/bin/maq map -m 0.00001 -C 250 -n 2 -e 100 -1 0 -u 800044356.00014.unmapped -H 800044356.00014.mismatch \ 800044356.00014.map /data/maqgene/genomes/elegans.bfa 800044356.00014.1.bfq 2> /dev/null # Sun Apr 21 22:18:02 BST 2013: Merging all maps ... /data/maqgene/bin/maq mapmerge 800044356.map 800044356.00000.map 800044356.00001.map 800044356.00002.map 800044356.00003.map 800044356.00004.map 800044356.00005.map 800044356.00006.map 800044356.00007.map 800044356.00008.map 800044356.00009.map 800044356.00010.map 800044356.00011.map 800044356.00012.map 800044356.00013.map 800044356.00014.map 800044356.00015.map 800044356.00016.map 800044356.00017.map 800044356.00018.map 800044356.00019.map 800044356.00020.map 800044356.00021.map 800044356.00022.map 800044356.00023.map 800044356.00024.map # Sun Apr 21 22:18:02 BST 2013: Merging all *.unmapped files ... cat 800044356.00000.unmapped 800044356.00001.unmapped 800044356.00002.unmapped 800044356.00003.unmapped 800044356.00004.unmapped 800044356.00005.unmapped 800044356.00006.unmapped 800044356.00007.unmapped 800044356.00008.unmapped 800044356.00009.unmapped 800044356.00010.unmapped 800044356.00011.unmapped 800044356.00012.unmapped 800044356.00013.unmapped 800044356.00014.unmapped 800044356.00015.unmapped 800044356.00016.unmapped 800044356.00017.unmapped 800044356.00018.unmapped 800044356.00019.unmapped 800044356.00020.unmapped 800044356.00021.unmapped 800044356.00022.unmapped 800044356.00023.unmapped 800044356.00024.unmapped > 800044356_unmapped.txt # Sun Apr 21 22:18:04 BST 2013: Linking backend file 800044356_unmapped.txt to /data/maqgene/out/example_user/CB1122_2/CB1122_2_unmapped.txt ln -fs /data/maqgene/work/800044356_unmapped.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_unmapped.txt /data/maqgene/makefile:126: warning: undefined variable `mapcheck_parameters' # Sun Apr 21 22:25:41 BST 2013: Generating consensus ... # Sun Apr 21 22:25:41 BST 2013: Creating pileup ... /data/maqgene/bin/maq pileup -Q 100 -m 7 -q 0 /data/maqgene/genomes/elegans.bfa 800044356.map > 1449392424_pileup.txt /data/maqgene/bin/maq assemble -N 2 -q 0 -r 0.0 -m 7 -Q 100 -p 2232948069.cns \ /data/maqgene/genomes/elegans.bfa 800044356.map 2> 2232948069_log.txt # Sun Apr 21 22:25:41 BST 2013: Running 'mapcheck' ... /data/maqgene/bin/maq mapcheck /data/maqgene/genomes/elegans.bfa 800044356.map > 800044356_check.txt \ 2>/dev/null # Sun Apr 21 22:26:55 BST 2013: Linking backend file 800044356_check.txt to /data/maqgene/out/example_user/CB1122_2/CB1122_2_check.txt ln -fs /data/maqgene/work/800044356_check.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_check.txt /data/maqgene/bin/filter_matching_lines 1449392424_pileup.txt "%s\t%i\t" \ <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select distinct dna, start + 1 from elegans_features where feature = 'SNP' order by dna, start") "%s\t%i\n" si \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 0 \ > 1449392424_known_snps # Sun Apr 21 22:28:19 BST 2013: Getting uncovered regions ... cut -f 1,2,4 1449392424_pileup.txt \ | /data/maqgene/bin/get_uncovered_regions 50 \ > 1449392424_uncovered.txt # Sun Apr 21 22:28:19 BST 2013: Linking backend file 1449392424_pileup.txt to /data/maqgene/out/example_user/CB1122_2/CB1122_2_pileup.txt ln -fs /data/maqgene/work/1449392424_pileup.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_pileup.txt # Sun Apr 21 22:28:19 BST 2013: Making coverage histogram ... (echo -en "sequencing_depth\tnumber_of_bases\n"; \ cut -f 4 1449392424_pileup.txt \ | /data/maqgene/bin/pileup_histogram 100) > 1449392424_coverage.txt # Sun Apr 21 22:28:54 BST 2013: Linking backend file 1449392424_coverage.txt to /data/maqgene/out/example_user/CB1122_2/CB1122_2_coverage.txt ln -fs /data/maqgene/work/1449392424_coverage.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_coverage.txt # Sun Apr 21 22:29:03 BST 2013: Filtering and loading pileup for analysis ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1449392424_pileup; create table 1449392424_pileup ( dna enum('I','II','III','IV','MtDNA','V','X') NOT NULL, start int(10) NOT NULL, ref_base char(1) NOT NULL, read_depth int(5) NOT NULL, sample_reads varchar(100) NOT NULL, A_fwd int(5) NOT NULL, A_rev int(5) NOT NULL, C_fwd int(5) NOT NULL, C_rev int(5) NOT NULL, G_fwd int(5) NOT NULL, G_rev int(5) NOT NULL, T_fwd int(5) NOT NULL, T_rev int(5) NOT NULL, wt_fwd int(5) NOT NULL, wt_rev int(5) NOT NULL, primary key (dna, start) );" cat 1449392424_pileup.txt \ | /data/maqgene/bin/filter_maq_pileup 'AaCcGgTt,.' 2 \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1449392424_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 1449392424_pileup" # Sun Apr 21 22:29:05 BST 2013: Linking backend file 1449392424_uncovered.txt to /data/maqgene/out/example_user/CB1122_2/CB1122_2_uncovered.txt ln -fs /data/maqgene/work/1449392424_uncovered.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_uncovered.txt cat 1449392424_known_snps \ | awk 'BEGIN { OFS="\t" } { $5=substr($5,1,100); print $0 }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1449392424_pileup (dna, @start, ref_base, read_depth, sample_reads, A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev) set start = @start - 1; flush table 1449392424_pileup" touch 1449392424_pileup # Sun Apr 21 22:29:54 BST 2013: Writing snp read counts ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select f.id, f.dna, f.start + 1 start, e.snp_name, e.snp_base, if(e.snp_base = 'a', p.A_fwd + p.A_rev, if (e.snp_base = 'c', p.C_fwd + p.C_rev, if (e.snp_base = 'g', p.G_fwd + p.G_rev, if (e.snp_base = 't', p.T_fwd + p.T_rev, -1)))) snp_reads, p.ref_base, p.wt_fwd + p.wt_rev wt_reads, p.read_depth, p.sample_reads from elegans_features f join elegans_snp_changes e on (f.attribute = e.snp_name) join 1449392424_pileup p using (dna,start)" > /data/maqgene/out/example_user/CB1122_2/CB1122_2_snp_read_counts.txt 0 snp read count lines written. # Sun Apr 21 22:29:58 BST 2013: Extracting point mutants from consensus ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_snps; create table 2232948069_snps ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, indel_size int(5) NOT NULL, variant_type enum('indel', 'point') NOT NULL, ref_base char NOT NULL, sample_base char NOT NULL, snp_score int(5) NOT NULL, read_depth int(5) NOT NULL, loci_multiplicity double(5,2) NOT NULL, mapping_quality int(5) NOT NULL, neighbor_quality int(5) NOT NULL, primary key (id), unique key (dna, start), key (indel_size) )" # Sun Apr 21 22:29:58 BST 2013: Linking backend file 2232948069_log.txt to /data/maqgene/out/example_user/CB1122_2/CB1122_2_log.txt ln -fs /data/maqgene/work/2232948069_log.txt /data/maqgene/out/example_user/CB1122_2/CB1122_2_log.txt /data/maqgene/bin/maq cns2snp 2232948069.cns \ | cat -b | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2232948069_snps (id, dna, @start, ref_base, sample_base, snp_score, read_depth, loci_multiplicity, mapping_quality, neighbor_quality) set start = @start - 1, end = @start, indel_size = 0, variant_type = 'point'; flush table 2232948069_snps;" # Sun Apr 21 22:30:07 BST 2013: Extracting indels from consensus ... /data/maqgene/bin/maq indelsoa /data/maqgene/genomes/elegans.bfa 800044356.map \ | awk '{ if ($5+$6-$4 >= 5 && $4 <= 1) { print $0 } }' \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2232948069_snps (dna, @start, indel_size, @num_spanning_reads, @num_left_reads, @num_right_reads, @junk) set id = NULL, start = @start - 1, end = @start - 1 + if (indel_size > 0, 0, -indel_size), ref_base = 'X', sample_base = 'X', variant_type = 'indel', snp_score = -1000, read_depth = if (@num_left_reads < @num_right_reads, @num_left_reads, @num_right_reads), loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 2232948069_snps;" # Sun Apr 21 22:30:51 BST 2013: Adding placeholders for known SNPs. cat 1449392424_known_snps \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' ignore into table 2232948069_snps (dna, @start, ref_base, read_depth, @junk) set id = NULL, start = @start - 1, end = @start - 1, variant_type = 'point', snp_score = -1000, loci_multiplicity = -1000, mapping_quality = -1000, neighbor_quality = -1000; flush table 2232948069_snps;" Found 164073 variants. touch 2232948069_snps # Sun Apr 21 22:30:55 BST 2013: Getting masking regions # Sun Apr 21 22:30:55 BST 2013: Finding all genomic features overlapping variants... cat 1449392424_uncovered.txt \ | /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1449392424_uncovered; create table 1449392424_uncovered ( id int(10) NOT NULL auto_increment, dna char(40) NOT NULL, start int(10) NOT NULL, end int(10) NOT NULL, primary key (id) ) auto_increment = 164074; load data local infile '/dev/stdin' into table 1449392424_uncovered (dna, start, end)" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_rel_snps; CREATE TABLE 2232948069_rel_snps ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 2 -o >(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "drop table if exists 2232948069_masked_ids; create table 2232948069_masked_ids (query_region_id int(10) NOT NULL, primary key (query_region_id)); load data local infile '/dev/stdin' ignore into table 2232948069_masked_ids (@j1, query_region_id, @j3, @j4, @j5, @j6, @j7)") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 2232948069_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id,'elegans',dna,start,end,strand from elegans_features feature where feature.feature not in ('mRNA', 'intron')") 1>/dev/null; /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2232948069_rel_snps; flush table 2232948069_rel_snps") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 2232948069_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null Uncovered region statistics: chromosome number_uncovered_regions total_uncovered_length I 6 521 II 19 4223 III 8 767 IV 13 1205 V 12 1324 X 11 5393 touch 1449392424_uncovered # Sun Apr 21 22:30:55 BST 2013: Finding mRNA features overlapping uncovered regions ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1449392424_uncovered_rel; CREATE TABLE 1449392424_uncovered_rel ( association_id INT(10) NOT NULL, query_region_id INT(10) unsigned NOT NULL, target_region_id INT(10) NOT NULL, distance INT(10) NOT NULL, overlap INT(10) NOT NULL, same_strand enum('SAME', 'OPP') NOT NULL, num_regions_between INT(3) NOT NULL, PRIMARY KEY (association_id), KEY (query_region_id), KEY (target_region_id) )" /data/maqgene/bin/associate_regions -b -1 -c -1000000 -m 0 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 1449392424_uncovered_rel; flush table 1449392424_uncovered_rel") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 1449392424_uncovered") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna,start,end,strand from elegans_features where feature in ('SNP','exon','five_prime_UTR','intron','mRNA','three_prime_UTR','ncRNA')") 1>/dev/null touch 1449392424_uncovered_rel # Sun Apr 21 22:31:00 BST 2013: Calculating translation start offsets for uncovered regions... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 1449392424_offsets_uncovered; create table 1449392424_offsets_uncovered ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1449392424_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 1449392424_uncovered reg join 1449392424_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" touch 2232948069_masked_ids /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 1449392424_offsets_uncovered select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start))), if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.end, max(exon.end) - reg.end, sum(if(exon.end < reg.end, reg.end, exon.end) - if(exon.start < reg.end, reg.end, exon.start))), if(reg.end < min(exon.start), reg.end - min(exon.start), sum(if(reg.end < exon.end, reg.end, exon.end) - if(reg.end < exon.start, reg.end, exon.start)))), sum(exon.end - exon.start) total_length, 'end' boundary_type from 1449392424_uncovered reg join 1449392424_uncovered_rel rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" touch 1449392424_offsets_uncovered touch 2232948069_rel_snps # Sun Apr 21 22:31:02 BST 2013: Getting intergenic regions # Sun Apr 21 22:31:02 BST 2013: Calculating translation start offsets... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_rel_intergenic; create table 2232948069_rel_intergenic like 2232948069_rel_snps"; /data/maqgene/bin/associate_regions -b 50000 -c -1000000 -m 2 -o >(/usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "load data local infile '/dev/stdin' into table 2232948069_rel_intergenic; flush table 2232948069_rel_intergenic") -d /data/maqgene/genomes/elegans.dnas -p /data/maqgene/genomes -q <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans', dna, start, end, '+' from 2232948069_snps") -t <(/usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "select id, 'elegans',dna,start,end,strand from elegans_features feature where feature.feature = 'mRNA'") 1>/dev/null; /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_offsets_snps; create table 2232948069_offsets_snps ( id int(10) NOT NULL, gene varchar(50) NOT NULL, 5prime_offset int(10) NOT NULL, 3prime_offset int(10) NOT NULL, total_length int(10) NOT NULL, boundary_type char(20) NOT NULL, primary key (id, gene, boundary_type) );" /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_offsets_snps select reg.id, gene.attribute gene, if(exon.strand = '+', if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start))), if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))) ), if(exon.strand = '+', if(max(exon.end) < reg.start, max(exon.end) - reg.start, sum(if(exon.end < reg.start, reg.start, exon.end) - if(exon.start < reg.start, reg.start, exon.start))), if(reg.start < min(exon.start), reg.start - min(exon.start), sum(if(reg.start < exon.end, reg.start, exon.end) - if(reg.start < exon.start, reg.start, exon.start)))), sum(exon.end - exon.start) total_length, 'start' boundary_type from 2232948069_snps reg join 2232948069_rel_snps rel on (reg.id = rel.query_region_id) join elegans_features gene on (rel.target_region_id = gene.id) join elegans_features exon on (exon.attribute = gene.attribute) where exon.feature = 'exon' and gene.feature = 'mRNA' group by reg.id, gene.attribute" # Sun Apr 21 22:31:10 BST 2013: Filtering masked intergenic regions /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile -e "delete i.* from 2232948069_rel_intergenic i join 2232948069_masked_ids m using (query_region_id)" touch 2232948069_rel_intergenic # Sun Apr 21 22:31:19 BST 2013: Classifying intergenic relations /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_intergenic_assoc; create table 2232948069_intergenic_assoc ( id INT(10) NOT NULL, target_region_id INT(10) NOT NULL, relation enum ('upstream', 'downstream', 'into') NOT NULL, distance INT(10) NOT NULL, num_regions_between INT(3) NOT NULL, attribute varchar(50) NOT NULL, primary key (id, target_region_id), key (id, attribute) ); insert into 2232948069_intergenic_assoc select snp.id, rel.target_region_id, if (rel.overlap > 0, 'into', if (feature.strand = '+', if (snp.end < feature.start, 'upstream', 'downstream'), if (feature.end < snp.start, 'upstream', 'downstream') ) ) relation, rel.distance, rel.num_regions_between, feature.attribute from 2232948069_snps snp join 2232948069_rel_intergenic rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id)" touch 2232948069_intergenic_assoc touch 2232948069_offsets_snps # Sun Apr 21 22:31:42 BST 2013: Getting codons affected by point mutations... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_codons; create table 2232948069_codons ( id int(10) NOT NULL, gene varchar(50) NOT NULL, ref_codon char(4) NOT NULL, sam_codon char(4) NOT NULL, start char(1) NOT NULL, primary key (id, gene) ); insert into 2232948069_codons select snp.id, off.gene, @ref_codon := substr(gene_dna.sequence, (floor(if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1 ) / 3 ) * 3 ) + 1, 3 ) ref_codon, insert(@ref_codon, (if(feature.strand = '+', off.5prime_offset, off.5prime_offset - 1) % 3) + 1, 1, if(feature.strand = '+', snp.sample_base, substr('ACGTTGCA', instr('ACGTTGCA', snp.sample_base)+4, 1) ) ) sam_codon, if((feature.strand = '+' && off.5prime_offset < 3) || (feature.strand = '-' && off.5prime_offset - 1 < 3), 'Y', 'N') start from 2232948069_offsets_snps off join 2232948069_snps snp on (off.id = snp.id) join elegans_coding_dna gene_dna using (gene) join elegans_features feature on (gene_dna.gene = feature.attribute and feature.start <= snp.start and feature.end >= snp.end) where snp.variant_type = 'point' and feature.feature = 'exon' and off.boundary_type = 'start' and off.5prime_offset >= 0 and off.3prime_offset >= 0;" touch 2232948069_codons /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_marked; create table 2232948069_marked ( id int(10) NOT NULL, class varchar(100) NOT NULL, description varchar(100) NOT NULL, parent_feature varchar(100) NOT NULL, primary key (id, class, parent_feature) );" # Sun Apr 21 22:31:57 BST 2013: Finding exonic indels ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_marked select snp.id, if (snp.indel_size < 0, if (rel.overlap % 3 = 0, 'inframe', 'frameshift'), if (snp.indel_size % 3 = 0, 'inframe', 'frameshift')) class, if (rel.overlap != snp.end - snp.start, 'exon_boundary', 'none') description, feature.attribute parent_feature from 2232948069_snps snp join 2232948069_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'exon' and snp.variant_type = 'indel';" # Sun Apr 21 22:31:58 BST 2013: Finding coding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_marked select trans.id, if (reference_codons.amino = sample_codons.amino, 'silent', if (reference_codons.amino3 = 'stop', 'readthrough', if (sample_codons.amino3 = 'stop', 'premature_stop', if (trans.start = 'Y' and sample_codons.is_start != 'Y', 'non_start', 'missense') ) ) ) class, concat(ref_codon,'->',sam_codon,'[', reference_codons.amino3,'->',sample_codons.amino3,']') description, trans.gene from 2232948069_codons trans join elegans_genetic_code reference_codons on (trans.ref_codon = reference_codons.codon) join elegans_genetic_code sample_codons on (trans.sam_codon = sample_codons.codon);" # Sun Apr 21 22:31:58 BST 2013: Finding noncoding variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_marked select snp.id, feature.feature class, 'none' description, feature.attribute parent_feature from 2232948069_snps snp join 2232948069_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature not in ('exon', 'intron', 'mRNA');" # Sun Apr 21 22:31:59 BST 2013: Finding splice site variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_marked select snp.id, if (feature.strand = '+', if (snp.start - feature.start < 2, 'splice_donor', 'splice_acceptor'), if (snp.start - feature.start < 2, 'splice_acceptor', 'splice_donor')) class, 'none' description, feature.attribute parent_feature from 2232948069_snps snp join 2232948069_rel_snps rel on (snp.id = rel.query_region_id) join elegans_features feature on (rel.target_region_id = feature.id) where feature.feature = 'intron' and rel.distance > -2;" # Sun Apr 21 22:32:00 BST 2013: Finding intergenic and intronic variants ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_marked select id, 'nongenic' class, concat(if (relation in ('upstream','into'), - distance, distance),' ',relation) description, attribute from 2232948069_intergenic_assoc where ((relation = 'upstream' and num_regions_between = 0 and distance < 50000) or (relation = 'upstream' and num_regions_between < 2 and distance < 1000) or (relation = 'downstream' and num_regions_between = 0 and distance < 1000) or (relation = 'downstream' and num_regions_between < 2 and distance < 1000) or relation = 'into' );" # Sun Apr 21 22:32:05 BST 2013: Finding uncovered regions in genes ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "insert into 2232948069_marked select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',unc.start - gene.start,' to ',unc.end - gene.start,' bp into'), concat( if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), concat('codon ',floor(b.5prime_offset / 3) + 1) ) ), ' to ', if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), concat('codon ',ceiling(e.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(e.5prime_offset < 0,0,e.5prime_offset) - (if(b.5prime_offset < 0,0,b.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 1449392424_uncovered unc join 1449392424_offsets_uncovered b using (id) join 1449392424_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '+' and gene.feature = 'mRNA' union select b.id, 'uncovered', if(b.5prime_offset = e.5prime_offset, concat('non-exonic, ',gene.end - unc.end,' to ',gene.end - unc.start,' bp into'), concat( if(e.5prime_offset < 0, concat(-e.5prime_offset, ' bp upstream'), if(e.3prime_offset < 0, concat(-e.3prime_offset, ' bp downstream'), concat('codon ',floor(e.5prime_offset / 3) + 1) ) ), ' to ', if(b.3prime_offset < 0, concat(-b.3prime_offset, ' bp downstream'), if(b.5prime_offset < 0, concat(-b.5prime_offset, ' bp upstream'), concat('codon ',ceiling(b.5prime_offset / 3) + 1) ) ), ' (', format(100 * (if(b.5prime_offset < 0,0,b.5prime_offset) - (if(e.5prime_offset < 0,0,e.5prime_offset)) ) / b.total_length,0 ), ' % of ', floor(b.total_length/3), ' codons)' ) ) description, b.gene from 1449392424_uncovered unc join 1449392424_offsets_uncovered b using (id) join 1449392424_offsets_uncovered e using (id, gene) join elegans_features gene on (e.gene = gene.attribute) where b.boundary_type = 'start' and e.boundary_type = 'end' and gene.strand = '-' and gene.feature = 'mRNA'" # Sun Apr 21 22:32:05 BST 2013: Variants found: +-----------------+-----------------+ | class | number_variants | +-----------------+-----------------+ | five_prime_UTR | 1312 | | frameshift | 44 | | inframe | 10 | | missense | 6391 | | ncRNA | 893 | | nongenic | 126984 | | non_start | 9 | | premature_stop | 698 | | readthrough | 20 | | silent | 7516 | | SNP | 1055 | | splice_acceptor | 48 | | splice_donor | 50 | | three_prime_UTR | 3601 | | uncovered | 21 | +-----------------+-----------------+ touch 2232948069_marked # Sun Apr 21 22:32:07 BST 2013: Combining results ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_combined; create table 2232948069_combined (primary key (id, class, description), key (dna, start)) as select snp.id, 'CB1122_2' mutant_strain, snp.dna, snp.start + 1 start, snp.end - snp.start length, snp.ref_base reference_base, snp.sample_base sample_base, snp.snp_score consensus_score, snp.loci_multiplicity, snp.mapping_quality, snp.neighbor_quality, (pile.wt_fwd + pile.wt_rev) number_wildtype_reads, (snp.read_depth - (pile.wt_fwd + pile.wt_rev)) number_variant_reads, snp.read_depth sequencing_depth, pile.sample_reads, snp.variant_type, snp.indel_size, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 2232948069_snps snp join 1449392424_pileup pile using (dna,start) join 2232948069_marked class on (snp.id = class.id) left join elegans_per_locus using (dna,start) where (snp.variant_type = 'point' and (snp.snp_score >= 3 and snp.loci_multiplicity <= 10 and snp.neighbor_quality >= 0 and pile.read_depth >= 4 and ((pile.read_depth - pile.wt_fwd - pile.wt_rev) / pile.read_depth) >= 0.5) or snp.variant_type = 'indel' ) group by id, class, description union select unc.id id, 'CB1122_2' mutant_strain, unc.dna, unc.start + 1 start, unc.end - unc.start length, '-' reference_base, '-' sample_base, 0 consensus_score, 0, 0, 0, 0 number_wildtype_reads, 0 number_variant_reads, 0 sequencing_depth, '' sample_reads, 'uncovered', 0, class.class, class.description, group_concat(class.parent_feature order by class.parent_feature) parent_features from 1449392424_uncovered unc join 2232948069_marked class on (unc.id = class.id) left join elegans_per_locus using (dna,start) group by id, class, description; alter table 2232948069_combined order by dna, start, length" touch 2232948069_combined # Sun Apr 21 22:32:09 BST 2013: Writing results to flat file ... # Sun Apr 21 22:32:09 BST 2013: Writing results to grouped file ... /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select id, mutant_strain, dna, start, length, reference_base, sample_base, consensus_score, loci_multiplicity, mapping_quality, neighbor_quality, number_wildtype_reads, number_variant_reads, sequencing_depth, sample_reads, variant_type, indel_size, group_concat(class) classes, group_concat(description) descriptions, group_concat(concat('{',parent_features,'}')) parent_features from 2232948069_combined group by id order by dna, start;" > /data/maqgene/out/example_user/CB1122_2/CB1122_2_grouped.txt /usr/bin/mysql -Dmaqgenedb -umaqgene --skip-column-names --batch --raw --local-infile --column-names -e "select * from 2232948069_combined" > /data/maqgene/out/example_user/CB1122_2/CB1122_2_flat.txt 5053 lines written. 2856 lines written. rm 800044356.00003.map 800044356.00001.1.bfq 800044356.00010.1.bfq 800044356.00004.1.fastq 800044356.00006.mismatch 800044356.00016.1.bfq 800044356.00021.mismatch 800044356.00007.unmapped 800044356.00008.mismatch 800044356.00013.map 800044356.00000.mismatch 800044356.00014.1.fastq 800044356.00017.1.bfq 800044356.00002.1.bfq 800044356.00020.1.bfq 800044356.00001.mismatch 800044356.00010.mismatch 800044356.00016.mismatch 800044356.00021.unmapped 800044356.00008.unmapped 800044356.00012.1.bfq 800044356.00023.map 800044356.00024.unmapped 800044356.00019.map 800044356.00024.1.fastq 800044356.00017.mismatch 800044356.00003.1.bfq 800044356.00002.mismatch 800044356.00015.map 800044356.00020.mismatch 800044356.00018.1.bfq 800044356.00006.unmapped 800044356.00022.1.bfq 800044356.00012.mismatch 800044356.00010.map 800044356.00009.unmapped 800044356.00006.1.fastq 800044356.00000.unmapped 800044356.00000.1.fastq 800044356.00001.map 800044356.00001.unmapped 800044356.00010.unmapped 800044356.00015.mismatch 800044356.00013.1.bfq 800044356.00016.unmapped 800044356.00022.mismatch 800044356.00001.1.fastq 800044356.00010.1.fastq 800044356.00016.1.fastq 800044356.00005.map 800044356.00011.unmapped 800044356.00018.mismatch 800044356.00003.mismatch 800044356.00007.1.fastq 800044356.00011.1.fastq 800044356.00017.unmapped 800044356.00011.map 800044356.00005.1.bfq 800044356.00002.unmapped 800044356.00020.unmapped 800044356.00015.1.fastq 800044356.00017.1.fastq 800044356.00014.map 800044356.00023.1.bfq 800044356.00013.mismatch 800044356.00020.1.fastq 800044356.00019.1.bfq 800044356.00012.unmapped 800044356.00014.unmapped 800044356.00002.1.fastq 800044356.00015.1.bfq 800044356.00007.map 800044356.00012.1.fastq 800044356.00021.1.fastq 800044356.00008.1.fastq 800044356.00005.mismatch 800044356.00018.unmapped 800044356.00024.map 800044356.00018.1.fastq 800044356.00019.mismatch 800044356.00009.mismatch 800044356.00022.unmapped 800044356.00022.1.fastq 800044356.00021.map 800044356.00008.map 800044356.00003.unmapped 800044356.00009.1.fastq 800044356.00006.map 800044356.00003.1.fastq 800044356.00004.1.bfq 800044356.00013.unmapped 800044356.00000.map 3538352084_split1 800044356.00013.1.fastq 800044356.00007.1.bfq 800044356.00009.map 800044356.00011.1.bfq 800044356.00014.1.bfq 800044356.00016.map 800044356.00005.unmapped 800044356.00023.mismatch 800044356.00005.1.fastq 800044356.00004.mismatch 800044356.00023.unmapped 800044356.00019.unmapped 800044356.00023.1.fastq 800044356.00017.map 800044356.00021.1.bfq 800044356.00008.1.bfq 800044356.00019.1.fastq 800044356.00002.map 800044356.00004.map 800044356.00020.map 800044356.00015.unmapped 800044356.00014.mismatch 800044356.00024.1.bfq 800044356.00011.mismatch 800044356.00012.map 800044356.00006.1.bfq 800044356.00009.1.bfq 800044356.00018.map 800044356.00007.mismatch 800044356.00024.mismatch 800044356.00000.1.bfq 800044356.00022.map 800044356.00004.unmapped make: Leaving directory `/data/maqgene/work' make: Entering directory `/data/maqgene/work' # Sun Apr 21 22:32:13 BST 2013: Cleaning intermediate data ... /usr/bin/mysql -umaqgene -Dmaqgenedb --local-infile -e "drop table if exists 2232948069_snps,2232948069_rel,2232948069_masked_ids,2232948069_rel_intergenic,2232948069_intergenic_assoc,2232948069_offsets,2232948069_codons,2232948069_marked,2232948069_combined,1449392424_pileup" rm -f 2232948069_snps 2232948069_rel 2232948069_masked_ids 2232948069_rel_intergenic 2232948069_intergenic_assoc 2232948069_offsets 2232948069_codons 2232948069_marked 2232948069_combined 1449392424_pileup make: Leaving directory `/data/maqgene/work'