# for emacs: -*- mode: sh; -*- # This file describes processing of genomic sequence for the pufferfish # Tetraodon nigroviridis (Green Spotted Pufferfish), obtained # from www.genoscope.cns.fr # Fugu Rubripes (Japanese pufferfish), whole genome shotgun assembly dated # MASK TETRAODON SEQUENCE (IN PROGRESS 2003-09-22 kate) # which was downloaded from genoscope, V6, 99MB compressed # # NOTE: the RM cluster job was run with input from kksilo fileserver, # instead of cluster local disk, iservers or bluearc, and # the fileserver handled it fine. ssh kksilo cd /cluster/data/tetra ls tetraodon6.gz gunzip tetraodon6.gz mkdir -p split500K faSplit about tetraodon6 500000 split500K/tetra_ mkdir -p RMRun/out cd split500K foreach f (tetra_*.fa) echo /cluster/bin/scripts/RMFugu \ /cluster/data/tetra/split500K $f /cluster/data/tetra/RMRun/out \ '{'check out line+ /cluster/data/tetra/RMRun/out/$f.out'}' \ >> ../RMRun/RMJobs end cd ../RMRun wc -l RMJobs # 632 RMJobs ssh kk cd /cluster/data/tetra/RMRun head RMJobs para create RMJobs para try para check # PROCESS TETRA FOR SIMPLE REPEATS ssh kksilo mkdir -p /cluster/data/tetra/simpleRepeat cd /cluster/data/tetra/simpleRepeat mkdir trf cat << 'EOF' > jobs.csh foreach f (/cluster/data/tetra/split500K/tetra*.fa) set fout = $f:t:r.bed echo $fout /cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp end 'EOF' tcsh jobs.csh >&! jobs.log & # check on this with tail -f jobs.log ls -1 trf | wc -l # FILTER SIMPLE REPEATS INTO MASK # make a filtered version # of the trf output: # keep trf's with period <= 12: ssh kksilo cd /cluster/data/tetra/simpleRepeat mkdir -p trfMask foreach f (trf/*.bed) echo "filtering $f" awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t end # MASK USING REPEATMASKER AND FILTERED TRF FILES ssh kksilo cd /cluster/data/tetra mkdir masked cat << 'EOF' > mask.csh foreach p (split500K/tetra*.fa) set f = $p:t echo "masking $f" maskOutFa $p RMRun/out/$f.out masked/$f -soft maskOutFa masked/$f simpleRepeat/trfMask/${f:r}.bed masked/$f -softAdd end 'EOF' csh mask.csh >&! mask.log & tail -100f mask.log # COPY TO CLUSTER DATA AREA ssh kkr1u00 cd /iscratch/i mkdir tetra.2002 ln -s /iscratch/i/tetra.2002 tetra cp /cluster/data/tetra/masked/* /iscratch/i/tetra iSync