# for emacs: -*- mode: sh; -*- # Macaca Mulatta (rheMac1) # Mmul_0.1(January 10, 2005) # contigs and scaffolds, no chromosomes # Mmul_0.1 is a preliminary assembly of the rhesus monkey, Macaca # mulatta using whole genome shotgun (WGS) reads from small and medium # insert clones. # ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/ # anonymous ftp, no login/password needed # DOWNLOAD SEQUENCE (DONE March 9, 2005 Robert) ssh kksilo mkdir /cluster/store10/rheMac1 cd /cluster/data ln -s /cluster/store10/rheMac1 rheMac1 cd /cluster/data/rheMac1 mkdir downloads cd downloads mkdir contigs cd contigs wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/contigs/Mmul20041001.agp wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/contigs/Mmul20041001.fa.qual.gz wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/contigs/Mmul20041001.fa.gz cd .. mkdir linearScaffolds cd linearScaffolds wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/linearScaffolds/Rmac20041001-freeze-assembly.fa.gz wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/linearScaffolds/Rmac20041001-freeze-assembly.fa.qual.gz cd .. mkdir repeat-reads cd repeat-reads wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/repeats/Mmul20041001.reptigs.fa.gz wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/repeats/Mmul20041001.reptigs.fa.qual.gz cd .. # GUNZIP gunzip */*.gz # CHECK FILES # Check that IDs in .fa match IDs in .qual ssh kksilo cd /cluster/data/rheMac1/downloads # foreach Bin0 contigs linearScaffolds repeat-reads grep ">" *.fa > id.fa grep ">" *.qual > id.qual diff id.fa id.qual /cluster/bin/i386/checkAgpAndFa contigs/Mmul20041001.agp linearScaffolds/Rmac20041001-freeze-assembly.fa >& check.out # SPLIT SCAFFOLDS ssh kksilo cd /cluster/data/rheMac1 mkdir scaffolds nice /cluster/bin/i386/faSplit byname downloads/linearScaffolds/Rmac20041001-freeze-assembly.fa scaffolds/ -outDirDepth=3 # generate list find scaffolds -print > scaffolds.list.0 grep fa scaffolds.list.0 > scaffolds.list # MAKE 2BIT NIB FILE ssh kksilo cd /cluster/data/rheMac1/downloads/linearScaffolds nice /cluster/bin/i386/faToTwoBit Rmac20041001-freeze-assembly.fa rheMac1.2bit nice /cluster/bin/i386/twoBitToFa rheMac1.2bit check.fa mv rheMac1.2bit ../.. diff -q Rmac20041001-freeze-assembly.fa check.fa # could also use cmp # note: size match ssh hgwdev mkdir /gbdb/rheMac1 mkdir /gbdb/rheMac1/nib # could get rid of the nib in this path, 2bit isn't a nib (4bit) ln -s /cluster/data/rheMac1/rheMac1.2bit /gbdb/rheMac1/nib # CREATE DATABASE (DONE March 9, 2005 Robert) ssh hgwdev hgsql hg17 create database rheMac1; use rheMac1; create table grp (PRIMARY KEY(NAME)) select * from hg17.grp; # add rows to hgcentraltest on hgwbeta in dbDb and defaultDb # set orderKey past dog, before chimp # CREATE GAP TABLE ssh hgwdev cd kent/src/hg/lib # remove bin column for now hgsql rheMac1 < gap2.sql cd /cluster/data/rheMac1/downloads/contigs grep fragment Mmul20041001.agp > fragment.txt hgsql rheMac1 load data local infile 'fragment.txt' into table gap # create trackDb/monkey/rheMac1/gap.html # CREATE CONTIG TABLE ssh hgwdev cd kent/src/hg/lib # this doesn't include strand or contig start/end hgsql rheMac1 < ctgPos2.sql cd /cluster/data/rheMac1/downloads/contigs grep Contig Mmul20041001.agp > Contig.out ssh kksilo cd /cluster/data/rheMac1/downloads/contigs #where is getContig.pl?? # getContig.pl < Contig.out > ctgPos2 # ssh hgwdev # load data local infile 'ctgPos2' into table ctgPos2 # CREATE CHROMINFO TABLE # an improvement here would be to have getMaxCoord output the 2bit file name ssh hgwdev cd /cluster/data/rheMac1 # get coordinates from agp and put in database table getcoords.pl < downloads/contigs/Mmul20041001.agp > scaffolds.coords hgsql rheMac1 < /cluster/data/bosTau1/coords.sql load data local infile 'scaffolds.coords' into table scaffoldCoords faSize downloads/linearScaffolds/Rmac20041001-freeze-assembly.fa -detailed=on > chrom.sizes # calculate maximum coords; check that start coord is always 1 ## /cluster/home/heather/bin/i386/GetMaxCoord > getMaxCoord.rheMac1 edit chromInfo.sql; allow fileName to be null cp ~baertsch/kent/src/hg/lib/chromInfo.sql . hgsql rheMac1 < chromInfo.sql load data local infile 'chrom.sizes' into table chromInfo update chromInfo set fileName = "/gbdb/rheMac1/nib/rheMac1.2bit" # LOAD GAP & GOLD TABLES FROM AGP ssh hgwdev hgGoldGapGl -noGl rheMac1 /cluster/data/rheMac1/downloads/contigs/Mmul20041001.agp # REPEATMASKER # using the split scaffold fa files generated earlier # note the version of RepeatMasker in use; will want this for download README cat /cluster/bluearc/RepeatMasker/Libraries/version # RepBase Update 9.04, RM database version 20040702 # do a trial run ssh hgwdev cd /cluster/data/rheMac1/scaffolds/0/0/0 /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s -spec macaca SCAFFOLD1000.fa # configure cd /cluster/data/rheMac1 mkdir jkStuff cp /cluster/data/anoGam1/jkStuff/RMAnopheles jkStuff/RMRhesus # change references anoGam1 --> rheMac1 mkdir RMRun # /bin/csh makeJoblist-RM.csh cd scaffolds foreach i (0 1 2 3 4 5 6 7 8 9) cd $i foreach j (0 1 2 3 4 5 6 7 8 9) cd $j foreach k (0 1 2 3 4 5 6 7 8 9) cd $k foreach f (*.fa) echo /cluster/data/rheMac1/jkStuff/RMRhesus \ /cluster/data/rheMac1/scaffolds/$i/$j/$k $f \ '{'check out line+ /cluster/data/rheMac1/scaffolds/$i/$j/$k/$f.out'}' \ >> /cluster/data/rheMac1/RMRun/RMJobs.2 end cd .. end cd .. end cd .. end # do the run (DONE March 11, 2005 Robert) ssh kk cd /cluster/data/rheMac1/RMRun para create RMJobs.2 para try para push # concatenate into one output file; took about 90 minutes ssh kksilo cd /cluster/data/rheMac1 mkdir repeats /bin/tcsh concatRM.csh cd repeats # use grep -v to get rid of all the headers # then, add back in an initial header # this is so hgLoadOut is happy grep SCAFFOLD repeats.all > repeats.clean cat repeats.header repeats.clean > repeats.final grep "There were no repetitive sequences found" repeats.final > repeats.notfound grep -v "There were no repetitive sequences found" repeats.final > repeats.out ssh hgwdev hgLoadOut rheMac1 repeats.out # Strange perc. fields: Processing repeats.out Strange perc. field -0.1 line 647418 of repeats.out Strange perc. field -20.5 line 972234 of repeats.out Strange perc. field -42.4 line 972234 of repeats.out Strange perc. field -0.6 line 972234 of repeats.out Strange perc. field -6.1 line 1121046 of repeats.out Strange perc. field -1.2 line 1349524 of repeats.out Strange perc. field -4.5 line 1418655 of repeats.out Strange perc. field -0.5 line 1527820 of repeats.out Strange perc. field -4.6 line 1826806 of repeats.out Strange perc. field -1.2 line 1826806 of repeats.out Strange perc. field -0.3 line 1992385 of repeats.out Strange perc. field -5.6 line 2168548 of repeats.out Strange perc. field -89.9 line 2783366 of repeats.out Strange perc. field -18.2 line 2783366 of repeats.out Strange perc. field -72.8 line 2783366 of repeats.out Strange perc. field -1.0 line 2892969 of repeats.out Strange perc. field -2.7 line 2892969 of repeats.out Strange perc. field -5.5 line 2963499 of repeats.out Strange perc. field -28.4 line 3041043 of repeats.out Strange perc. field -9.1 line 3041043 of repeats.out Strange perc. field -0.4 line 3167171 of repeats.out Strange perc. field -0.2 line 3167171 of repeats.out Strange perc. field -0.5 line 3467859 of repeats.out Strange perc. field -0.1 line 3487882 of repeats.out Strange perc. field -4.6 line 3637051 of repeats.out Strange perc. field -1.2 line 3637051 of repeats.out Strange perc. field -2.1 line 4155697 of repeats.out Strange perc. field -2.4 line 4155697 of repeats.out Strange perc. field -3.5 line 4423839 of repeats.out Strange perc. field -0.6 line 4781433 of repeats.out note: 883 records dropped due to repStart > repEnd hgsql rheMac1 rename table repeats_rmsk to rmsk # select count(*) from rmsk; # select count(*) from rmsk where repName like "Bov%"; # SIMPLE REPEATS # put the results throughout the scaffold 0/0/0 directories, # same as RepeatMasker, to avoid too many files in the same directory ssh kksilo cd /cluster/data/rheMac1 mkdir bed cd bed mkdir simpleRepeat cd simpleRepeat /bin/csh makeJoblist-trf.csh tcsh trf-run.csh > & ! trf.log & # concatenate into one output file; took about an hour /bin/tcsh concatTRF.csh # load /cluster/bin/i386/hgLoadBed rheMac1 simpleRepeat trf.bed \ -sqlTable = /cluster/home/heather/kent/src/hg/lib/simpleRepeat.sql #Reading trf.bed #Loaded 628275 elements of size 16 #Sorted #Saving bed.tab #Loading rheMac1 create index chrom_2 on simpleRepeat (chrom(16), chromStart); create index chrom_3 on simpleRepeat (chrom(16), chromEnd); # CREATE MASKED FA USING REPEATMASKER AND FILTERED TRF FILES ssh kksilo cd /cluster/data/rheMac1 /cluster/bin/i386/maskOutFa -soft downloads/linearScaffolds/Rmac20041001-freeze-assembly.fa repeats/repeat.out rheMac1.softmask.fa WARNING: negative rEnd: -364 SCAFFOLD25002:50185-50335 L1MCc WARNING: negative rEnd: -17 SCAFFOLD65002:22962-23303 L1MCc WARNING: negative rEnd: -168 SCAFFOLD15011:216786-217104 L1MEd WARNING: negative rEnd: -63 SCAFFOLD15011:218103-218302 L1MEd WARNING: negative rEnd: -58 SCAFFOLD30017:67006-67483 L1ME3A WARNING: negative rEnd: -184 SCAFFOLD70018:124976-125294 L1MDa WARNING: negative rEnd: -243 SCAFFOLD30019:12475-12710 L1MC WARNING: negative rEnd: -559 SCAFFOLD90026:6083-6297 L1M3e WARNING: negative rEnd: -772 SCAFFOLD65027:182872-183028 L1ME2 WARNING: negative rEnd: -38 SCAFFOLD50029:73156-74338 L1MEd WARNING: negative rEnd: -206 SCAFFOLD125032:291298-291342 L1PB2 WARNING: negative rEnd: -426 SCAFFOLD80032:171209-171503 L1M2c WARNING: negative rEnd: -221 SCAFFOLD65033:142196-142540 L1MCc WARNING: negative rEnd: -189 SCAFFOLD80035:127722-128125 L1MCc WARNING: negative rEnd: -37 SCAFFOLD91036:35051-35595 L1M4b WARNING: negative rEnd: -1042 SCAFFOLD61039:14634-14836 L1MEb WARNING: negative rEnd: -610 SCAFFOLD100041:45441-45539 L1P4d WARNING: negative rEnd: -12 SCAFFOLD61046:8669-8902 L1M4b WARNING: negative rEnd: -3 SCAFFOLD54:56518-56566 FRAM WARNING: negative rEnd: -161 SCAFFOLD100056:7086-7696 L1M2a WARNING: negative rEnd: -1175 SCAFFOLD75057:117403-117585 L1MEc WARNING: negative rEnd: -195 SCAFFOLD75057:118016-118986 L1MEc WARNING: negative rEnd: -95 SCAFFOLD70059:42627-42663 L1M3b WARNING: negative rEnd: -143 SCAFFOLD80068:10427-10703 L1MCc WARNING: negative rEnd: -32 SCAFFOLD15069:109411-109499 L1PA10 WARNING: negative rEnd: -465 SCAFFOLD95070:93652-93843 L1PBa WARNING: negative rEnd: -149 SCAFFOLD50071:46224-46441 L1MCc WARNING: negative rEnd: -9 SCAFFOLD120072:35714-35752 AluJ/FLAM WARNING: negative rEnd: -121 SCAFFOLD40075:175596-175802 L1M4b WARNING: negative rEnd: -3 SCAFFOLD105090:247522-247572 FRAM WARNING: negative rEnd: -751 SCAFFOLD55093:83484-83644 L1MD2 WARNING: negative rEnd: -448 SCAFFOLD96093:5787-6077 L1M2 WARNING: negative rEnd: -259 SCAFFOLD80094:21315-21842 L1MA4A WARNING: negative rEnd: -17 SCAFFOLD45096:62501-62604 L1MCb WARNING: negative rEnd: -465 SCAFFOLD110103:97060-97263 L1PA15-16 WARNING: negative rEnd: -241 SCAFFOLD45103:66136-66966 L1M4b WARNING: negative rEnd: -50 SCAFFOLD45103:67253-67435 L1M4b WARNING: negative rEnd: -1116 SCAFFOLD100106:180437-180562 L1MEb WARNING: negative rEnd: -144 SCAFFOLD60110:170536-170650 L1MCc WARNING: negative rEnd: -421 SCAFFOLD25112:16796-16975 L1ME1 WARNING: negative rEnd: -273 SCAFFOLD45112:69878-69957 L1M3de WARNING: negative rEnd: -112 SCAFFOLD45112:70274-70337 L1M3de WARNING: negative rEnd: -161 SCAFFOLD45112:70642-70730 L1M3de WARNING: negative rEnd: -1056 SCAFFOLD90113:3554-3834 L1MEc WARNING: negative rEnd: -277 SCAFFOLD65123:11579-12006 L1ME3A WARNING: negative rEnd: -49 SCAFFOLD25127:16121-16503 L1MDa WARNING: negative rEnd: -27 SCAFFOLD25127:16811-16842 L1MDa WARNING: negative rEnd: -182 SCAFFOLD25140:786-1026 L1MEc WARNING: negative rEnd: -134 SCAFFOLD11150:14559-14793 L1MEd WARNING: negative rEnd: -198 SCAFFOLD5150:89989-90045 L1M4 WARNING: negative rEnd: -131 SCAFFOLD5150:90409-90468 L1M4 WARNING: negative rEnd: -3 SCAFFOLD5150:91247-91362 L1M4 WARNING: negative rEnd: -11 SCAFFOLD40153:105943-106073 MLT2B4 WARNING: negative rEnd: -31 SCAFFOLD85154:75958-75978 FRAM WARNING: negative rEnd: -728 SCAFFOLD56155:5878-6313 L1MEb WARNING: negative rEnd: -639 SCAFFOLD56155:6492-6680 L1MEb WARNING: negative rEnd: -26 SCAFFOLD100160:1-587 L1MB8 WARNING: negative rEnd: -202 SCAFFOLD105165:51915-52062 L1M3de WARNING: negative rEnd: -111 SCAFFOLD6165:3223-3363 L1M4 WARNING: negative rEnd: -760 SCAFFOLD40168:10976-11421 L1MEa WARNING: negative rEnd: -698 SCAFFOLD40168:11433-11589 L1MEa WARNING: negative rEnd: -14 SCAFFOLD75169:54846-54883 FRAM WARNING: negative rEnd: -92 SCAFFOLD105171:55570-55642 MLT2B4 WARNING: negative rEnd: -274 SCAFFOLD50189:22507-22737 L1MEb WARNING: negative rEnd: -231 SCAFFOLD40196:85003-85368 L1MCc WARNING: negative rEnd: -136 SCAFFOLD40196:85393-85520 L1MCc WARNING: negative rEnd: -12 SCAFFOLD70199:80341-80456 L1ME3A WARNING: negative rEnd: -677 SCAFFOLD100203:35059-35219 L1MCc WARNING: negative rEnd: -170 SCAFFOLD35203:54829-55008 L1MB3 WARNING: negative rEnd: -1 SCAFFOLD50207:100221-100236 Alu WARNING: negative rEnd: -343 SCAFFOLD126214:34627-34745 L1P4d WARNING: negative rEnd: -1956 SCAFFOLD40225:109594-109802 L1MB8 WARNING: negative rEnd: -1682 SCAFFOLD40225:109813-110031 L1MB8 WARNING: negative rEnd: -419 SCAFFOLD20230:16903-17089 L1MCc WARNING: negative rEnd: -97 SCAFFOLD75231:22726-23168 L1MCc WARNING: negative rEnd: -302 SCAFFOLD120233:107608-107705 L1M1 WARNING: negative rEnd: -124 SCAFFOLD100239:46223-46431 L1MCc WARNING: negative rEnd: -63 SCAFFOLD41244:21184-21368 L1ME1 WARNING: negative rEnd: -153 SCAFFOLD80248:45221-45315 L1M4b WARNING: negative rEnd: -9 SCAFFOLD110249:100831-100895 L1M3de WARNING: negative rEnd: -57 SCAFFOLD10251:25507-25787 L1MB8 WARNING: negative rEnd: -643 SCAFFOLD77259:860-1045 L1MCc WARNING: negative rEnd: -498 SCAFFOLD76268:14604-14797 L1MEb WARNING: negative rEnd: -258 SCAFFOLD76268:14809-14996 L1MEb WARNING: negative rEnd: -4 SCAFFOLD90269:54122-54199 MLT2B4 WARNING: negative rEnd: -365 SCAFFOLD7273:15-191 L1MCc WARNING: negative rEnd: -228 SCAFFOLD50275:11636-12058 L1PBa WARNING: negative rEnd: -751 SCAFFOLD20279:104338-104383 L1M4b WARNING: negative rEnd: -226 SCAFFOLD30287:7366-7781 L1PBa WARNING: negative rEnd: -1039 SCAFFOLD5293:72196-72400 L1MEb WARNING: negative rEnd: -603 SCAFFOLD10304:114127-114271 L1M5 WARNING: negative rEnd: -160 SCAFFOLD10304:114363-114560 L1M5 WARNING: negative rEnd: -92 SCAFFOLD10304:114575-114740 L1M5 WARNING: negative rEnd: -901 SCAFFOLD10327:91532-91805 L1MEd WARNING: negative rEnd: -564 SCAFFOLD10327:92262-92572 L1MEd WARNING: negative rEnd: -308 SCAFFOLD55348:27043-27419 L1M4b WARNING: negative rEnd: -1930 SCAFFOLD100349:55726-55876 L1M4b WARNING: negative rEnd: -175 SCAFFOLD100349:56278-57031 L1M4b WARNING: negative rEnd: -100 SCAFFOLD100349:63947-64039 L1M4b WARNING: negative rEnd: -266 SCAFFOLD80349:48410-48682 L1MEd WARNING: negative rEnd: -145 SCAFFOLD121350:10964-11015 L1MDa WARNING: negative rEnd: -53 SCAFFOLD75350:47481-47572 L1ME1 WARNING: negative rEnd: -636 SCAFFOLD10375:46541-46860 L1M4b WARNING: negative rEnd: -87 SCAFFOLD40382:8244-8802 L1MEe WARNING: negative rEnd: -28 SCAFFOLD45384:49442-49558 MLT2B4 WARNING: negative rEnd: -552 SCAFFOLD80392:104844-105116 L1MEd WARNING: negative rEnd: -427 SCAFFOLD80392:109481-109601 L1MEd WARNING: negative rEnd: -939 SCAFFOLD60398:64940-65310 L1M4 WARNING: negative rEnd: -7 SCAFFOLD76399:14698-15345 L1M4c WARNING: negative rEnd: -309 SCAFFOLD100411:46163-46198 L1MEd WARNING: negative rEnd: -152 SCAFFOLD100411:46508-46659 L1MEd WARNING: negative rEnd: -116 SCAFFOLD11422:8740-9284 L1M3c WARNING: negative rEnd: -147 SCAFFOLD80423:74087-74325 L1MEd WARNING: negative rEnd: -606 SCAFFOLD50470:9112-9341 L1P4b WARNING: negative rEnd: -420 SCAFFOLD25477:86802-87608 L1MEc WARNING: negative rEnd: -2 SCAFFOLD25477:87814-88211 L1MEc WARNING: negative rEnd: -24 SCAFFOLD50494:29845-29965 MLT2B4 WARNING: negative rEnd: -1030 SCAFFOLD15495:103193-103375 L1ME3A WARNING: negative rEnd: -392 SCAFFOLD15495:104580-104749 L1ME3A WARNING: negative rEnd: -148 SCAFFOLD15495:105065-105297 L1ME3A WARNING: negative rEnd: -46 SCAFFOLD110523:36392-36574 HAL1 WARNING: negative rEnd: -378 SCAFFOLD110523:37954-38837 HAL1 WARNING: negative rEnd: -184 SCAFFOLD40529:9254-9330 L1M4 WARNING: negative rEnd: -9 SCAFFOLD95544:42116-42338 L1M4 WARNING: negative rEnd: -1997 SCAFFOLD30557:64701-64869 L1M4b WARNING: negative rEnd: -739 SCAFFOLD30557:65181-66324 L1M4b WARNING: negative rEnd: -681 SCAFFOLD30557:66636-66693 L1M4b WARNING: negative rEnd: -6 SCAFFOLD105562:14442-14491 FRAM WARNING: negative rEnd: -9 SCAFFOLD96566:4841-4880 FRAM WARNING: negative rEnd: -250 SCAFFOLD65573:63410-63539 AluJo_short_ WARNING: negative rEnd: -201 SCAFFOLD576:16208-16578 L1M4b WARNING: negative rEnd: -6 SCAFFOLD75578:54511-54560 FRAM WARNING: negative rEnd: -104 SCAFFOLD50626:58371-58412 MLT2B4 WARNING: negative rEnd: -517 SCAFFOLD124648:98-302 L1M3e WARNING: negative rEnd: -278 SCAFFOLD20658:33043-33220 L1P4b WARNING: negative rEnd: -420 SCAFFOLD660:27740-27925 L1MCc WARNING: negative rEnd: -444 SCAFFOLD660:28264-28359 L1MCc WARNING: negative rEnd: -72 SCAFFOLD20676:75256-75325 MLT2B4 WARNING: negative rEnd: -165 SCAFFOLD100679:57845-58265 L1PA13 WARNING: negative rEnd: -281 SCAFFOLD10695:66679-66826 L1MDa WARNING: negative rEnd: -147 SCAFFOLD50701:26891-27410 L1M3c WARNING: negative rEnd: -335 SCAFFOLD60709:63984-64229 L1MCc WARNING: negative rEnd: -46 SCAFFOLD80722:29210-29650 L1M4 WARNING: negative rEnd: -1477 SCAFFOLD25737:40798-40921 L1MD2 WARNING: negative rEnd: -629 SCAFFOLD25737:40904-41337 L1MD2 WARNING: negative rEnd: -271 SCAFFOLD25737:47745-47952 L1MD2 WARNING: negative rEnd: -38 SCAFFOLD80773:51525-52241 L1MEd WARNING: negative rEnd: -751 SCAFFOLD80773:52278-52545 L1MEd WARNING: negative rEnd: -112 SCAFFOLD87802:479-871 L1MCc WARNING: negative rEnd: -15 SCAFFOLD71803:28509-28547 MLT2B4 WARNING: negative rEnd: -259 SCAFFOLD60807:87176-87306 L1MCc WARNING: negative rEnd: -24 SCAFFOLD60839:5191-5420 L1M4b WARNING: negative rEnd: -115 SCAFFOLD75849:14545-14683 L1M4 WARNING: negative rEnd: -491 SCAFFOLD5875:11189-11235 L1MEb WARNING: negative rEnd: -321 SCAFFOLD110959:5414-5484 L1MCc # matches select count(*) from rmsk where repEnd < 0; /cluster/bin/i386/maskOutFa -softAdd rheMac1.softmask.fa bed/simpleRepeat/trf.bed rheMac1.softmask2.fa # hard masking (Ns instead of lower case) for download files # split for use by genscan /cluster/bin/i386/maskOutFa rheMac1.softmask2.fa hard rheMac1.hardmask.fa mkdir hardmask-split /cluster/bin/i386/faSplit about rheMac1.hardmask.fa 2000000 hardmask-split # DOWNLOAD FILES ssh hgwdev cd /cluster/data/rheMac1 cp rheMac1.softmask.fa /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips cp rheMac1.softmask2.fa /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips cp rheMac1.hardmask.fa /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips cd /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips nice gzip rheMac1.softmask.fa nice gzip rheMac1.softmask2.fa nice gzip rheMac1.hardmask.fa # REGENERATE 2BIT NIB ssh kksilo cd /cluster/data/rheMac1 /cluster/bin/i386/faToTwoBit rheMac1.softmask2.fa rheMac1.softmask.2bit /cluster/bin/i386/twoBitToFa rheMac1.softmask.2bit check.softmask.fa diff -q rheMac1.softmask2.fa check.softmask.fa mv rheMac1.2bit rheMac1.2bit.unmasked mv rheMac1.softmask.2bit rheMac1.2bit # note: size match # GENERATE split masked fas for blastz ssh kkr1u00 mkdir -p /iscratch/i/rheMac1/nib cd /cluster/data/rheMac1 cp /cluster/data/rheMac1/rheMac1.2bit /iscratch/i/rheMac1/nib/ #mkdir -p /iscratch/i/rheMac1/splitFas #/cluster/bin/i386/faSplit sequence /cluster/data/rheMac1/rheMac1.softmask2.fa 350 /iscratch/i/rheMac1/splitDir/ cd /iscratch/i/rheMac1 # check that I got all scaffolds #grep SCAFFOLD *.fa > SCAFFOLD.list # sync /cluster/bin/scripts/iSync ssh kksilo cd /cluster/data/hg17/bed/blastz.rheMac1 # alignments (DONE - March 16 - Robert) # edit doBlastzChainNet.pl to add lines for hg17 glitch # in function getDbFromPath # if ($db eq "hg") { # $db = "hg17"; # } doBlastzChainNet.pl DEF -blastzOutRoot /panasas/store/rheMacOut/ >& do.log & # make blat ooc (DONE - March 17 - Robert) ssh kkr1u00 mkdir /cluster/data/rheMac1/bed/ooc cd /cluster/data/rheMac1/bed/ooc ls -1 /cluster/data/rheMac1/rheMac1.2bit > nib.lst /cluster/bin/i386/blat nib.lst /dev/null /dev/null -tileSize=11 \ -makeOoc=11.ooc -repMatch=800 Writing 11.ooc Wrote 38031 overused 11-mers to 11.ooc Done making 11.ooc cp -p 11.ooc /iscratch/i/rheMac1/ iSync # SWAP CHAINS FROM HG17, BUILD NETS ETC. (DONE 3/31/2005 Robert) ssh eieio mkdir /cluster/data/rheMac1/bed/blastz.hg17.swap cd /cluster/data/rheMac1/bed/blastz.hg17.swap # use special version of doBlastzChainNet.pl to handle hg17 hack # must run on kolossus , due to memory requirements. nohup /cluster/data/hg17/bed/blastz.rheMac1/doBlastzChainNet.pl -swap /cluster/data/hg17/bed/blastz.rheMac1/DEF \ -workhorse=kolossus >& do.log & tail -f do.log # Add {chain,net}hg17 to trackDb.ra if necessary. # Add /usr/local/apache/htdocs/goldenPath/rheMac1/vsHg17/README.txt # hgLoadChain ran out of memory , split chains cd /cluster/data/rheMac1/bed/blastz.hg17.swap/axtChain mkdir split chainSplit split rheMac1.hg17.all.chain.gz -q & # filter chains score < 5000 mkdir splitFilter for i in `ls split` ; do echo $i ;chainFilter split/$i -minScore=5000 > splitFilter/$i ; done rm -rf split #assumes empty chain tables exist for i in `ls splitFilter` ; do echo $i ; hgLoadChain -oldTable -tIndex rheMac1 chainHg17 splitFilter/$i ; done # Add gap/repeat stats to the net file using database tables: cd /cluster/data/rheMac1/bed/blastz.hg17.swap/axtChain netClass -verbose=0 -noAr noClass.net rheMac1 hg17 rheMac1.hg17.net # Load nets: netFilter -minGap=10 rheMac1.hg17.net \ | hgLoadNet -verbose=0 rheMac1 netHg17 stdin # make liftOver chains ssh eieio gunzip /cluster/data/hg17/bed/liftOver/hg17.rheMac1.over.chain.gz ssh hgwdev ln /cluster/data/hg17/bed/liftOver/hg17.rheMac1.over.chain /gbdb/hg17/liftOver/hg17TorheMac1.over.chain -s cd /usr/local/apache/htdocs/goldenPath/bosTau1/liftOver cp /cluster/data/bosTau1/bed/bedOver/bosTau1.hg17.over.chain.gz . # hgsql hgcentraltest # insert into liftOverChain # (fromDb, toDb, path, minMatch, minSizeT, minSizeQ, multiple, minBlocks, fudgeThick) values ("rheMac1", "hg17", "/gbdb/rheMac1/liftOver/rheMac1.hg17.over.chain", 0.95, 0, 0, "Y", 1, "N"); # insert into liftOverChain # (fromDb, toDb, path, minMatch, minSizeT, minSizeQ, multiple, minBlocks, fudgeThick) values ("hg17", "rheMac1", "/gbdb/hg17/liftOver/hg17TorheMac1.over.chain", 0.95, 0, 0, "Y", 1, "N"); # GC5BASE (DONE - 2005-04-01 - Galt) ssh hgwdev mkdir -p /cluster/data/rheMac1/bed/gc5Base cd /cluster/data/rheMac1/bed/gc5Base hgGcPercent -wigOut -doGaps -file=stdout -win=5 rheMac1 \ /cluster/data/rheMac1 | wigEncode stdin gc5Base.wig gc5Base.wib # Calculating gcPercent with window size 5 # Using twoBit: /cluster/data/rheMac1/rheMac1.2bit # File stdout created # Converted stdin, upper limit 100.00, lower limit 0.00 # runs for about 1 hour mkdir /gbdb/rheMac1/wib ln -s `pwd`/gc5Base.wib /gbdb/rheMac1/wib hgLoadWiggle -pathPrefix=/gbdb/rheMac1/wib rheMac1 gc5Base gc5Base.wig # analyze table to update the Cardinality column, so it's not NULL: # (Hiram is going to add this to hgLoadWiggle so it is done automatically) hgsql rheMac1 -e "analyze table gc5Base;" # verify index is correct: # should see good numbers in Cardinality column, not NULL: hgsql rheMac1 -e "show index from gc5Base;" # load qual scores (DONE - 2005-06-09 - Robert) ssh kkstore01-10 cd /cluster/data/rheMac1/qual wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/linearScaffolds/Rmac20041001-freeze-assembly.fa.qual.gz gunzip *.gz qaToQac Rmac20041001-freeze-assembly.fa.qual Rmac20041001-freeze-assembly.fa.qac qacToWig -fixed Rmac20041001-freeze-assembly.fa.qac rheMac1.wig wigEncode rheMac1.wig rheMac1.qual.wig rheMac1.qual.wib ln /cluster/data/rheMac1/qual/rheMac1.qual.wib /gbdb/rheMac1/wib/rheMac.qual.wib -s ssh hgwdev cd /cluster/data/rheMac1/qual hgLoadWiggle rheMac1 quality rheMac1.qual.wig