# for emacs: -*- mode: sh; -*- # This is actually supposed to be proCap1. # You will find the correct building of rock hyrax in the proCap1.txt doc. # rock hyrax ( Procavia capensis ) ######################################################################### # DOWNLOAD SEQUENCE (DONE braney 2008-07-11) ssh kkstore05 mkdir /cluster/store12/proCav1 ln -s /cluster/store12/proCav1 /cluster/data mkdir /cluster/data/proCav1/broad cd /cluster/data/proCav1/broad wget --timestamping \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/rockHyrax/assembly.agp \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/rockHyrax/assembly.bases.gz \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/rockHyrax/assembly.quals.gz qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin proCav1.qual.qac wget --timestamping \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/rockHyrax/BasicStats.out # -------------------------------------------------------------------------------- # Fri Jan 18 10:02:36 2008 run (pid=6704), using Tue Jan 15 10:51:03 EST 2008 # make # BasicStats PRE=/wga/dev/WGAdata DATA=projects/LowCoverage/Procavia # \ # RUN=run/work SUBDIR=assisted_4.12 QUAL_STATS=True # \ # OUTFILE=BasicStats.out # -------------------------------------------------------------------------------- # # Supercontigs having < 3 reads or < 1kb sequence are # ignored. # 1759 gaps <= -1000; 0 gaps <= -10000; 0 gaps <= -100000 ## fraction of gaps < -10kb or more than 4 deviations below zero: 0.341% # 0 gaps > 10kb, 0 gaps > 50kb, 0 gaps > 200kb, 0 gaps > # 1Mb # 79.79% of reads were used in the assembly (80.62% of # bases, 79.78% of Q20 bases) # 0% of reads were used multiply in the assembly # 794933 contigs, having N50 length 3487 # total contig length: 2341371985, spanning 2908327961 # bases (with 19.5% in gaps) # 242541 supercontigs, having N50 length 21281 (not # including gaps) # 99.2% of assembly in supers of size < 200000 (2883627646 # bases) # Assembly base coverage: 2.04X. Assembly Q20 coverage: # 1.87X. # 99.99% of bases have q >= 1 # 97.24% of bases have q >= 20 # 93.46% of bases have q >= 30 # 89.6% of bases have q >= 40 # 84.66% of bases have q >= 50 cut -f 1 assembly.agp | uniq -c | wc -l # Number of scaffolds: 295008 ######################################################################### # Create .ra file and run makeGenomeDb.pl ssh kkstore05 cd /cluster/data/proCav1 cat << _EOF_ >proCav1.config.ra # Config parameters for makeGenomeDb.pl: db proCav1 clade mammal genomeCladePriority 35 scientificName Procavia capensis commonName Rock hyrax assemblyDate Jul. 2008 assemblyLabel Broad Institute proCav1 orderKey 236.5 #mitoAcc AJ222767 mitoAcc none fastaFiles /cluster/data/proCav1/broad/assembly.bases.gz agpFiles /cluster/data/proCav1/broad/assembly.agp qualFiles /cluster/data/proCav1/broad/proCav1.qual.qac dbDbSpeciesDir rockHyrax _EOF_ # use 'screen' make sure on kkstore05 makeGenomeDb.pl -verbose=2 proCav1.config.ra > makeGenomeDb.out 2>&1 & # 'ctl-a ctl -d' returns to previous shell cut -f 2 chrom.sizes | ave stdin # Q1 1549.000000 # median 4249.000000 # Q3 11283.000000 # average 10119.277335 # min 1000.000000 # max 390036.000000 # count 295008 # total 2985267768.000000 # standard deviation 16728.467480 ######################################################################### # REPEATMASKER (DONE braney 2008-07-29) ssh kkstore05 screen # use a screen to manage this job mkdir /cluster/data/proCav1/bed/repeatMasker cd /cluster/data/proCav1/bed/repeatMasker doRepeatMasker.pl -buildDir=/cluster/data/proCav1/bed/repeatMasker \ proCav1 > do.log 2>&1 & # Note: can run simpleRepeats simultaneously #### When done with RM: ssh pk para time # Completed: 6258 of 6258 jobs # CPU time in finished jobs: 20645642s 344094.03m 5734.90h 238.95d # 0.655 y # IO & Wait Time: 120642s 2010.71m 33.51h 1.40d # 0.004 y # Average job time: 3318s 55.31m 0.92h 0.04d # Longest finished job: 9661s 161.02m 2.68h 0.11d # Submission to last job: 66905s 1115.08m 18.58h 0.77d time nice -n +19 featureBits proCav1 rmsk > fb.proCav1.rmsk.txt 2>&1 & # 1006823376 bases of 2298455021 (43.804%) in intersection # RepeatMasker and lib version from do.log: # Jun 13 2008 (open-3-2-5) version of RepeatMasker # CC RELEASE 20080611; ######################################################################### # SIMPLE REPEATS TRF (DONE braney 2008-07-29) ssh kkstore05 screen # use a screen to manage this job mkdir /cluster/data/proCav1/bed/simpleRepeat cd /cluster/data/proCav1/bed/simpleRepeat # doSimpleRepeat.pl -buildDir=/cluster/data/proCav1/bed/simpleRepeat \ proCav1 > do.log 2>&1 & #### When done ssh pk para time # Completed: 51 of 51 jobs # CPU time in finished jobs: 24985s 416.41m 6.94h 0.29d # 0.001 y # IO & Wait Time: 101s 1.69m 0.03h 0.00d # 0.000 y # Average job time: 492s 8.20m 0.14h 0.01d # Longest finished job: 3887s 64.78m 1.08h 0.04d # Submission to last job: 3911s 65.18m 1.09h 0.05d featureBits proCav1 simpleRepeat # 24277312 bases of 2407857472 (1.008%) in intersection # after RM run is done, add this mask: cd /cluster/data/proCav1 twoBitMask proCav1.rmsk.2bit -add bed/simpleRepeat/trfMask.bed proCav1.2bit twoBitToFa proCav1.2bit stdout | faSize stdin # 2985267768 bases (577410296 N's 2407857472 real 1732706800 upper 675150672 # lower) in 295008 sequences in 1 files # Total size: mean 10119.3 sd 16728.5 min 1000 (scaffold_295007) max 390036 # (scaffold_1) median 4249 # N count: mean 1957.3 sd 3298.2 # U count: mean 5873.4 sd 10786.7 # L count: mean 2288.6 sd 3537.1 # %22.62 masked total, %28.04 masked real twoBitToFa proCav1.rmsk.2bit stdout | faSize stdin # 2985267768 bases (577410296 N's 2407857472 real 1733130127 upper 674727345 # lower) in 295008 sequences in 1 files # Total size: mean 10119.3 sd 16728.5 min 1000 (scaffold_295007) max 390036 # (scaffold_1) median 4249 # N count: mean 1957.3 sd 3298.2 # U count: mean 5874.9 sd 10789.4 # L count: mean 2287.1 sd 3534.9 # %22.60 masked total, %28.02 masked real # Link to it from /gbdb ln -s /cluster/data/proCav1/proCav1.2bit /gbdb/proCav1/proCav1.2bit # mkdir /san/sanvol1/scratch/proCav1 cp /cluster/data/proCav1/proCav1.2bit /san/sanvol1/scratch/proCav1 cp /cluster/data/proCav1/chrom.sizes /san/sanvol1/scratch/proCav1