# for emacs: -*- mode: sh; -*- # This file describes browser build for the chrPic1 # Western Painted Turtle - Chrysemys picta bellii - Dec 2011 # http://www.ncbi.nlm.nih.gov/genome/12107 # http://www.ncbi.nlm.nih.gov/genome/assembly/326468/ # http://www.ncbi.nlm.nih.gov/bioproject/78657 - WashU # http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AHGY01 # 15X coverage WGS # http://www.ncbi.nlm.nih.gov/bioproject/11857 - chrMt - U. Mich. # NC_002073.3 - AF069423.1 # http://www.ncbi.nlm.nih.gov/nuccore/NC_002073.3 # DATE: 29-Dec-2011 # ORGANISM: Chrysemys picta bellii # TAXID: 8478 # ASSEMBLY LONG NAME: Chrysemys_picta_bellii-3.0.1 # ASSEMBLY SHORT NAME: Chrysemys_picta_bellii-3.0.1 # ASSEMBLY SUBMITTER: International Painted Turtle Genome Sequencing Consortium # ASSEMBLY TYPE: Haploid # NUMBER OF ASSEMBLY-UNITS: 2 # ASSEMBLY ACCESSION: GCA_000241765.1 # ##Below is a 2 column list with assembly-unit id and name. # ##The Primary Assembly unit is listed first. # GCA_000241775.1 Primary Assembly # GCA_000241805.1 non-nuclear # FTP-RELEASE DATE: 10-Jan-2012 # Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Coelomata; # Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; # Teleostomi; Euteleostomi; Sarcopterygii; Tetrapoda; Amniota; # Sauropsida; Testudines; Cryptodira; Testudinoidea; Emydidae; # Chrysemys; Chrysemys picta ############################################################################# # Fetch sequence from genbank (DONE - 2012-01-12 - Hiram) mkdir -p /hive/data/genomes/chrPic1/genbank cd /hive/data/genomes/chrPic1/genbank time rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_other/Chrysemys_picta/Chrysemys_picta_bellii-3.0.1/ ./ faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz # 2589728838 bases (431455958 N's 2158272880 real 2158272880 upper # 0 lower) in 80983 sequences in 1 files # Total size: mean 31978.7 sd 446227.0 # min 501 (gi|370893393|gb|AHGY01540527.1|) # max 26452378 (gi|371559404|gb|JH584429.1|) median 753 faSize non-nuclear/unlocalized_scaffolds/FASTA/chrMT.unlocalized.scaf.fa.gz # 18940 bases (0 N's 18940 real 18940 upper 0 lower) in 4 sequences # in 1 files # Total size: mean 4735.0 sd 7919.8 # min 563 (gi|370925593|gb|AHGY01508327.1|) # max 16612 (gi|371051809|gb|AHGY01385006.1|) median 970 # the automatic pickup of the chrMt sequence fails because the name # in the fasta is only: "Trichechus manatus" and the script is # expecting "Trichechus manatus latirostris" # So, pick up the sequence and include it with the wild card # specification of files here: wget -O NC_002073.NCBI.fa \ "http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&dopt=fasta&sendto=on&id=NC_002073" sed -e 's/^>.*/>chrM/' NC_002073.NCBI.fa > chrM.fa faCount chrM.fa #seq len A C G T N cpg chrM 16866 5810 4376 2165 4515 0 331 echo "chrM 1 16866 1 O NC_002073 1 16866 +" | tr '[ ]' '[\t]' > chrM.agp ############################################################################# # process into UCSC naming scheme (DONE - 2012-01-12 - Hiram) mkdir /hive/data/genomes/chrPic1/ucsc cd /hive/data/genomes/chrPic1/ucsc # verify we don't have any .acc numbers different from .1 zcat ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \ | cut -f1 | egrep "^JH|AHGY" \ | sed -e 's/^JH[0-9][0-9]*//; s/^AHGY[0-9][0-9]*//' | sort | uniq -c # 1022441 .1 # this is like the unplaced.pl script in other assemblies except it # does not add chrUn_ to the names since they are all just scaffolds cat << '_EOF_' > unplaced.pl #!/bin/env perl use strict; use warnings; my $agpFile = "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz"; my $fastaFile = "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz"; open (FH, "zcat $agpFile|") or die "can not read $agpFile"; open (UC, ">unplaced.agp") or die "can not write to unplaced.agp"; while (my $line = ) { if ($line =~ m/^#/) { print UC $line; } else { $line =~ s/\.1//; printf UC "%s", $line; } } close (FH); close (UC); open (FH, "zcat $fastaFile|") or die "can not read $fastaFile"; open (UC, ">unplaced.fa") or die "can not write to unplaced.fa"; while (my $line = ) { if ($line =~ m/^>/) { chomp $line; $line =~ s/.*gb\|//; $line =~ s/\.1\|.*//; printf UC ">$line\n"; } else { print UC $line; } } close (FH); close (UC); '_EOF_' # << happy emacs chmod +x unplaced.pl time ./unplaced.pl # real 1m4.707s # -rw-rw-r-- 1 52300039 Jan 12 16:03 unplaced.agp # -rw-rw-r-- 1 2627874889 Jan 12 16:03 unplaced.fa time gzip *.fa *.agp # verify nothing lost in the translation, should be the same as above # except for the name translations faSize *.fa.gz # 2589728838 bases (431455958 N's 2158272880 real 2158272880 upper 0 lower) # in 80983 sequences in 1 files # Total size: mean 31978.7 sd 446227.0 min 501 (AHGY01540527) # max 26452378 (JH584429) median 753 # the automatic pickup of the chrMt sequence fails because the name # in the fasta is only: "Trichechus manatus" and the script is # expecting "Trichechus manatus latirostris" # So, pick up the sequence and include it with the wild card # specification of files here: cat ../genbank/chrM.fa | gzip -c > chrM.fa.gz cat ../genbank/chrM.agp | gzip -c > chrM.agp.gz ############################################################################# # Initial browser build (DONE - 2012-03-27 - Hiram) cd /hive/data/genomes/chrPic1 cat << '_EOF_' > chrPic1.config.ra # Config parameters for makeGenomeDb.pl: db chrPic1 clade vertebrate genomeCladePriority 50 scientificName Chrysemys picta bellii commonName Painted Turtle assemblyDate Dec. 2011 assemblyLabel International Painted Turtle Genome Sequencing Consortium (GCA_000241765.1) assemblyShortLabel v3.0.1 ncbiAssemblyName Chrysemys_picta_bellii-3.0.1 ncbiAssemblyId 326468 orderKey 4000 # chrM already included mitoAcc none fastaFiles /hive/data/genomes/chrPic1/ucsc/*.fa.gz agpFiles /hive/data/genomes/chrPic1/ucsc/*.agp.gz dbDbSpeciesDir turtle taxId 8478 '_EOF_' # << happy emacs time makeGenomeDb.pl -workhorse=hgwdev -stop=agp chrPic1.config.ra \ > agp.log 2>&1 # real 2m2.893s # check the end of agp.log to verify it is OK time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev \ -continue=db chrPic1.config.ra > db.log 2>&1 # real 17m30.996s ############################################################################# # running repeat masker (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/chrPic1/bed/repeatMasker cd /hive/data/genomes/chrPic1/bed/repeatMasker time doRepeatMasker.pl -buildDir=`pwd` -noSplit \ -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=encodek chrPic1 > do.log 2>&1 & # real 252m12.216s cat faSize.rmsk.txt # 2589745704 bases (431455958 N's 2158289746 real 2088449309 upper # 69840437 lower) in 80984 sequences in 1 files # Total size: mean 31978.5 sd 446224.3 min 501 (AHGY01540527) # max 26452378 (JH584429) median 753 # %2.70 masked total, %3.24 masked real grep -i versi do.log # RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $ # April 26 2011 (open-3-3-0) version of RepeatMasker featureBits -countGaps chrPic1 rmsk # 70001996 bases of 2589745704 (2.703%) in intersection # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, this featureBits count # separates out the N's from the bases, it doesn't show lower case N's ########################################################################## # running simple repeat (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/chrPic1/bed/simpleRepeat cd /hive/data/genomes/chrPic1/bed/simpleRepeat time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=memk \ chrPic1 > do.log 2>&1 & # real 43m42.360s cat fb.simpleRepeat # 21563230 bases of 2158293365 (0.999%) in intersection # not going to add to rmsk here, using the window masker instead since # it masks more sequence cd /hive/data/genomes/chrPic1 twoBitMask chrPic1.rmsk.2bit \ -add bed/simpleRepeat/trfMask.bed chrPic1.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa chrPic1.2bit stdout | faSize stdin > faSize.chrPic1.2bit.txt cat faSize.chrPic1.2bit.txt # 2608572064 bases (131440969 N's 2477131095 real 1320629270 upper # 1156501825 lower) in 2685 sequences in 1 files # Total size: mean 971535.2 sd 4827933.6 min 1003 (AGCE01151413) # max 72162052 (JH378105) median 1553 rm /gbdb/chrPic1/chrPic1.2bit ln -s `pwd`/chrPic1.2bit /gbdb/chrPic1/chrPic1.2bit ######################################################################### # Verify all gaps are marked, add any N's not in gap as type 'other' # (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/chrPic1/bed/gap cd /hive/data/genomes/chrPic1/bed/gap time nice -n +19 findMotif -motif=gattaca -verbose=4 \ -strand=+ ../../chrPic1.unmasked.2bit > findMotif.txt 2>&1 # real 0m23.801s grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed time featureBits -countGaps chrPic1 -not gap -bed=notGap.bed # 2158293365 bases of 2589745704 (83.340%) in intersection # real 0m21.340s time featureBits -countGaps chrPic1 allGaps.bed notGap.bed -bed=new.gaps.bed # 3619 bases of 2589745704 (0.000%) in intersection # real 216m26.278s # what is the highest index in the existing gap table: hgsql -N -e "select ix from gap;" chrPic1 | sort -n | tail -1 # 8964 cat << '_EOF_' > mkGap.pl #!/bin/env perl use strict; use warnings; my $ix=`hgsql -N -e "select ix from gap;" chrPic1 | sort -n | tail -1`; chomp $ix; open (FH,") { my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line); ++$ix; printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart, $chromEnd, $ix, $chromEnd-$chromStart; } close (FH); '_EOF_' # << happy emacs chmod +x ./mkGap.pl ./mkGap.pl > other.bed featureBits -countGaps chrPic1 other.bed # 3619 bases of 2589745704 (0.000%) in intersection wc -l other.bed # 3589 hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \ -noLoad chrPic1 otherGap other.bed # Read 3589 elements of size 8 from other.bed # starting with this many hgsql -e "select count(*) from gap;" chrPic1 # 470729 hgsql chrPic1 -e 'load data local infile "bed.tab" into table gap;' # result count: hgsql -e "select count(*) from gap;" chrPic1 # 474318 # == 470729 + 3589 # verify we aren't adding gaps where gaps already exist # this would output errors if that were true: gapToLift -minGap=1 chrPic1 nonBridged.lift -bedFile=nonBridged.bed # see example in danRer7.txt # there are no non-bridged gaps here: hgsql -N -e "select bridge from gap;" chrPic1 | sort | uniq -c # 474318 yes ########################################################################## ## WINDOWMASKER (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/chrPic1/bed/windowMasker cd /hive/data/genomes/chrPic1/bed/windowMasker time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev chrPic1 > do.log 2>&1 & # real 225m32.368s # Masking statistics twoBitToFa chrPic1.wmsk.2bit stdout | faSize stdin # 2860591921 bases (676999153 N's 2183592768 real 1197030078 upper # 986562690 lower) in 22819 sequences in 1 files # Total size: mean 125360.1 sd 395781.0 min 1000 (AFYH01291818) # max 10736886 (JH126562) median 5475 # %34.49 masked total, %45.18 masked real twoBitToFa chrPic1.wmsk.sdust.2bit stdout | faSize stdin # 2860591921 bases (676999153 N's 2183592768 real 1186196719 upper # 997396049 lower) in 22819 sequences in 1 files # Total size: mean 125360.1 sd 395781.0 min 1000 (AFYH01291818) # max 10736886 (JH126562) median 5475 # %34.87 masked total, %45.68 masked real hgLoadBed chrPic1 windowmaskerSdust windowmasker.sdust.bed.gz # Read 11886030 elements of size 3 from windowmasker.sdust.bed.gz featureBits -countGaps chrPic1 windowmaskerSdust # 1104610436 bases of 2589745704 (42.653%) in intersection # eliminate the gaps from the masking featureBits chrPic1 -not gap -bed=notGap.bed # 2158289746 bases of 2158289746 (100.000%) in intersection time nice -n +19 featureBits chrPic1 windowmaskerSdust notGap.bed \ -bed=stdout | gzip -c > cleanWMask.bed.gz # 673156757 bases of 2158289746 (31.189%) in intersection # real 109m11.237s # reload track to get it clean hgLoadBed chrPic1 windowmaskerSdust cleanWMask.bed.gz # Read 11868504 elements of size 4 from cleanWMask.bed.gz time featureBits -countGaps chrPic1 windowmaskerSdust # 673156757 bases of 2589745704 (25.993%) in intersection # real 1m39.393s # mask with this clean result zcat cleanWMask.bed.gz \ | twoBitMask ../../chrPic1.unmasked.2bit stdin \ -type=.bed chrPic1.cleanWMSdust.2bit twoBitToFa chrPic1.cleanWMSdust.2bit stdout | faSize stdin \ > chrPic1.cleanWMSdust.faSize.txt cat chrPic1.cleanWMSdust.faSize.txt # 2589745704 bases (431455958 N's 2158289746 real 1485132989 upper # 673156757 lower) in 80984 sequences in 1 files # Total size: mean 31978.5 sd 446224.3 min 501 (AHGY01540527) # max 26452378 (JH584429) median 753 # %25.99 masked total, %31.19 masked real # how much does this window masker and repeat masker overlap: featureBits -countGaps chrPic1 rmsk windowmaskerSdust # 42492356 bases of 2589745704 (1.641%) in intersection # real 2m15.424s ######################################################################### # MASK SEQUENCE WITH WM+TRF (DONE - 2012-03-29 - Hiram) cd /hive/data/genomes/chrPic1 twoBitMask -add bed/windowMasker/chrPic1.cleanWMSdust.2bit \ bed/simpleRepeat/trfMask.bed chrPic1.2bit # safe to ignore the warnings about BED file with >=13 fields twoBitToFa chrPic1.2bit stdout | faSize stdin > faSize.chrPic1.txt cat faSize.chrPic1.txt # 2589745704 bases (431455958 N's 2158289746 real 1485006823 upper # 673282923 lower) in 80984 sequences in 1 files # Total size: mean 31978.5 sd 446224.3 min 501 (AHGY01540527) # max 26452378 (JH584429) median 753 # %26.00 masked total, %31.20 masked real # create symlink to gbdb ssh hgwdev rm /gbdb/chrPic1/chrPic1.2bit ln -s `pwd`/chrPic1.2bit /gbdb/chrPic1/chrPic1.2bit ######################################################################### # cpgIslands - (DONE - 2011-04-24 - Hiram) mkdir /hive/data/genomes/chrPic1/bed/cpgIslands cd /hive/data/genomes/chrPic1/bed/cpgIslands time doCpgIslands.pl chrPic1 > do.log 2>&1 # real 187m22.286s cat fb.chrPic1.cpgIslandExt.txt # 15937502 bases of 2158289746 (0.738%) in intersection ######################################################################### # genscan - (DONE - 2011-04-26 - Hiram) mkdir /hive/data/genomes/chrPic1/bed/genscan cd /hive/data/genomes/chrPic1/bed/genscan time doGenscan.pl chrPic1 > do.log 2>&1 # one failed job: ./runGsBig.csh JH584641 000 gtf/000/JH584641.gtf pep/000/JH584641.pep subopt/000/JH584641.bed # rerunning with window size of 200000 # real 0m45.659s # continuing: time doGenscan.pl -continue=makeBed chrPic1 > makeBed.log 2>&1 # real 73m21.769s cat fb.chrPic1.genscan.txt # 41301989 bases of 2158289746 (1.914%) in intersection cat fb.chrPic1.genscanSubopt.txt # 50395023 bases of 2158289746 (2.335%) in intersection ######################################################################### # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-03 - Hiram) # Use -repMatch=900, based on size -- for human we use 1024 # use the "real" number from the faSize measurement, # hg19 is 2897316137, calculate the ratio factor for 1024: calc \( 2158272880 / 2897316137 \) \* 1024 # ( 2158272880 / 2897316137 ) * 1024 = 762.799544 # round up to 800 cd /hive/data/genomes/chrPic1 time blat chrPic1.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=jkStuff/chrPic1.11.ooc -repMatch=800 # Wrote 13782 overused 11-mers to jkStuff/chrPic1.11.ooc # real 0m58.297s # there are no non-bridged gaps, no lift file needed for genbank hgsql -N -e "select bridge from gap;" chrPic1 | sort | uniq -c # 474318 yes # cd /hive/data/genomes/chrPic1/jkStuff # gapToLift chrPic1 chrPic1.nonBridged.lift -bedFile=chrPic1.nonBridged.bed # largest non-bridged contig: # awk '{print $3-$2,$0}' chrPic1.nonBridged.bed | sort -nr | head # 123773608 chrX 95534 123869142 chrX.01 ######################################################################### # AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram) # examine the file: /cluster/data/genbank/data/organism.lst # for your species to see what counts it has for: # organism mrnaCnt estCnt refSeqCnt # Choloepus hoffmanni 7 0 0 # to decide which "native" mrna or ests you want to specify in genbank.conf ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # edit etc/genbank.conf to add chrPic1 just after ce2 # chrPic1 (painted turtle) chrPic1.serverGenome = /hive/data/genomes/chrPic1/chrPic1.2bit chrPic1.clusterGenome = /hive/data/genomes/chrPic1/chrPic1.2bit chrPic1.ooc = /hive/data/genomes/chrPic1/jkStuff/chrPic1.11.ooc chrPic1.lift = no chrPic1.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} chrPic1.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} chrPic1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} chrPic1.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} chrPic1.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} chrPic1.refseq.mrna.native.load = no chrPic1.refseq.mrna.xeno.load = yes chrPic1.genbank.mrna.xeno.load = yes chrPic1.genbank.est.native.load = no chrPic1.downloadDir = chrPic1 chrPic1.perChromTables = no # end of section added to etc/genbank.conf git commit -m "adding chrPic1 painted turtle" etc/genbank.conf git push make etc-update git pull # Edit src/lib/gbGenome.c to add new species. git commit -m "adding definition for chrPicNames Chrysemys picta bellii" \ src/lib/gbGenome.c # Western Painted Turtle - Chrysemys picta bellii - Dec 2011 git push make install-server ssh hgwdev # used to do this on "genbank" machine screen -S chrPic1 # long running job managed in screen cd /cluster/data/genbank time nice -n +19 ./bin/gbAlignStep -initial chrPic1 & # var/build/logs/2012.05.03-15:12:04.chrPic1.initalign.log # real 2418m11.414s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad chrPic1 & # logFile: var/dbload/hgwdev/logs/2012.05.08-13:23:32.dbload.log # real 42m38.307s # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram) cd ~/kent/src/hg/makeDb/genbank git pull # add chrPic1 to: vi etc/align.dbs etc/hgwdev.dbs git commit -m "Added chrPic1." etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # create pushQ entry (DONE - 2012-05-24 - Hiram) # first make sure all.joiner is up to date and has this new organism # a keys check should be clean: cd ~/kent/src/hg/makeDb/schema joinerCheck -database=chrPic1 -keys all.joiner mkdir /hive/data/genomes/chrPic1/pushQ cd /hive/data/genomes/chrPic1/pushQ makePushQSql.pl chrPic1 > chrPic1.sql 2> stderr.out # check stderr.out for no significant problems, it is common to see: # WARNING: hgwdev does not have /gbdb/chrPic1/wib/gc5Base.wib # WARNING: hgwdev does not have /gbdb/chrPic1/wib/quality.wib # WARNING: hgwdev does not have /gbdb/chrPic1/bbi/quality.bw # WARNING: chrPic1 does not have seq # WARNING: chrPic1 does not have extFile # which are no real problem # if some tables are not identified: # WARNING: Could not tell (from trackDb, all.joiner and hardcoded lists of # supporting and genbank tables) which tracks to assign these tables to: # # put them in manually after loading the pushQ entry scp -p chrPic1.sql hgwbeta:/tmp ssh hgwbeta cd /tmp hgsql qapushq < chrPic1.sql ######################################################################### # fetch a photo, found in Wiki Commons (DONE - 2012-06-07 - Hiram) mkdir /hive/data/genomes/chrPic1/photo cd /hive/data/genomes/chrPic1/photo wget --timestamping \ http://upload.wikimedia.org/wikipedia/commons/4/47/A4_Western_painted_turtle.jpg convert -quality 80 -geometry "350x200" A4_Western_painted_turtle.jpg \ Chrysemys_picta_bellii.jpg # original source URL: http://digitalmedia.fws.gov/cdm4/item_viewer.php?CISOROOT=/natdiglib&CISOPTR=2894&CISOBOX=1&REC=4 # in the public domain courtesy of: # U.S. Fish & Wildlife Service ######################################################################### # fixup search rule for assembly track/gold table (DONE - 2012-06-07 - Hiram) hgsql -N -e "select frag from gold;" chrPic1 | sort | head -1 AHGY01000001.1 hgsql -N -e "select frag from gold;" chrPic1 | sort | tail -2 AHGY01551716.1 NC_002073 # hence, add to trackDb/turtle/trackDb.ra searchTable gold shortCircuit 1 termRegex [AN][HC][G_][Y0]0[0-9]+(\.1)? query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%' searchPriority 8 #########################################################################