#!/bin/csh ssh hgwdev cd /cluster/data/encode mkdir -p ortho2/lifted cd ortho2/lifted hgsql hg16 -s -e "SELECT * FROM encodeRegions ORDER BY NAME" > hg16.bed # CHIMP ssh hgwdev cd /cluster/data/encode/ortho2 cd lifted mkdir -p panTro1 cd panTro1 # used chains from reciprocal best net -- these were more # chopped up, but regions in these don't span unbridged gaps # minMatch .7 leaves ENr324 and ENm011 unmapped # Lowered match threshold until region coverage looked good (to .4) # (except m11, for some reason), with minimum of fragmentation # Same at .3 and .2 -- at the point where regions look reasonable, # lowering the threshold in this way doesn't change them. # NOTE: reducing minSizeQ to 10000, due to fragmented panTro1 assembly cat > panTro1.csh << 'EOF' /bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \ -minSizeQ=10000 -multiple -chainTable=hg16.rBestChainPanTro1 \ ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16TopanTro1.chain \ panTro1.unmerged.bed5 panTro1.unmapped.bed -verbose=2 ~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \ panTro1.unmerged.bed5 panTro1.bed5 'EOF' #/cluster/data/hg16/bed/liftOver/hg16ToPanTro1.newId.over.chain \ # << for emacs csh panTro1.csh >&! panTro1.log wc -l panTro1.*bed* # 71 panTro1.bed5 # 79 panTro1.unmerged.bed5 # OLD 77 panTro1.bed5 # OLD 0 panTro1.unmapped.bed awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \ panTro1.bed5 > panTro1.bed cd .. ln -s panTro1/panTro1.bed . ssh hgwdev cd /cluster/data/encode/ortho2/lifted hgLoadBed -noBin panTro1 encodeRegions2 panTro1.bed # RAT ssh kksilo cd /cluster/data/encode/ortho2/lifted mkdir -p rn3 cd rn3 cat > rn3.csh << 'EOF' /bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \ -minSizeQ=20000 -multiple -chainTable=hg16.chainRn3 \ ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16Torn3.chain \ rn3.unmerged.bed5 rn3.unmapped.bed -verbose=2 ~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \ rn3.unmerged.bed5 rn3.bed5 'EOF' # << for emacs csh rn3.csh >&! rn3.log wc -l rn3.*bed* # 58 rn3.bed5 # 0 rn3.unmapped.bed awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \ rn3.bed5 > rn3.bed cd .. ln -s rn3/rn3.bed . ssh hgwdev cd /cluster/data/encode/ortho2/lifted hgLoadBed -noBin rn3 encodeRegions2 rn3.bed # CHICKEN ssh kksilo cd /cluster/data/encode/ortho2/lifted mkdir -p galGal2 cd galGal2 cat > galGal2.csh << 'EOF' # allow smaller chain sizes in query (only 1K, vs. 20K in close species) /bin/nice ~kate/bin/i386/liftOver -minMatch=.01 minSizeT=4000 \ -multiple -minSizeQ=1000 -chainTable=hg16.chainGalGal2 \ ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16TogalGal2.chain \ galGal2.unmerged.bed5 galGal2.unmapped.bed -verbose=2 ~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \ galGal2.unmerged.bed5 galGal2.bed5 'EOF' # << for emacs csh galGal2.csh >&! galGal2.log wc -l galGal2.*bed* # 75 galGal2.bed5 # 6 galGal2.unmapped.bed # maps all but r111 and r212, r222 awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \ galGal2.bed5 > galGal2.bed cd .. ln -s galGal2/galGal2.bed . ssh hgwdev cd /cluster/data/encode/ortho2/lifted hgLoadBed -noBin galGal2 encodeRegions2 galGal2.bed # MOUSE MM5 ssh kksilo cd /cluster/data/encode/ortho2/lifted mkdir -p mm5 cd mm5 cat > mm5.csh << 'EOF' /bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \ -minSizeQ=20000 -multiple \ ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16ToMm5.chain \ mm5.unmerged.bed5 mm5.unmapped.bed -verbose=2 ~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \ mm5.unmerged.bed5 mm5.bed5 'EOF' # << for emacs csh mm5.csh >&! mm5.log wc -l mm5.*bed* awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \ mm5.bed5 > mm5.bed cd .. ln -s mm5/mm5.bed . # load into database ssh hgwdev cd /cluster/data/encode/ortho2/lifted hgLoadBed -noBin mm5 encodeRegions2 mm5.bed # DOG ssh kksilo cd /cluster/data/encode/ortho2/lifted mkdir -p canFam1 cd canFam1 cat > canFam1.csh << 'EOF' /bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \ -minSizeQ=20000 -multiple -chainTable=hg16.chainCanFam1 \ ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16ToCanFam1.chain \ canFam1.unmerged.bed5 canFam1.unmapped.bed -verbose=2 ~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \ canFam1.unmerged.bed5 canFam1.bed5 'EOF' # << for emacs csh canFam1.csh >&! canFam1.log wc -l canFam1.*bed* # 51 canFam1.bed5 # 0 canFam1.unmapped.bed awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \ canFam1.bed5 > canFam1.bed cd .. ln -s canFam1/canFam1.bed . # load into database ssh hgwdev cd /cluster/data/encode/ortho2/lifted hgLoadBed -noBin canFam1 encodeRegions2 canFam1.bed # MOUSE MM3 -- for testing only mkdir -p tests cd tests # ????? Not sure if .01 used or not ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \ -minSizeQ=20000 -multiple \ /cluster/data/encode/ortho2/lifted/hg16.bed \ /cluster/data/hg16/bed/liftOver/hg16Tomm3.chain \ mm3.bed5 mm3.unmapped.bed awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' mm3.bed5 \ > mm3.bed # For now, load on top of freeze one, for Daryl's comparison page maker hgLoadBed mm3 encodeRegions mm3.bed -noBin # FUGU (IN PROGRESS - problem with net ?) ssh kksilo cd /cluster/data/encode/ortho2/lifted.noDups mkdir -p fr1 cd fr1 cat > fr1.csh << 'EOF' /bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \ -minSizeQ=1000 -multiple \ ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16ToFr1.chain \ fr1.unmerged.bed5 fr1.unmapped.bed -verbose=2 ~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \ fr1.unmerged.bed5 fr1.bed5 'EOF' # << for emacs csh fr1.csh >&! fr1.log wc -l fr1.*bed* # 51 fr1.bed5 # 0 fr1.unmapped.bed awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \ fr1.bed5 > fr1.bed cd .. ln -s fr1/fr1.bed . # load into database ssh hgwdev cd /cluster/data/encode/ortho2/lifted hgLoadBed -noBin fr1 encodeRegions2 fr1.bed # GET SEQUENCES FOR SPLIT REGIONS (FOR VALIDATION BY ELLIOT MARGULIES) ssh kksilo cd /cluster/data/encode/ortho2/lifted grep _2 *.bed | sed -e 's/_2//' -e 's/.bed:.*EN/.EN/' > split2.txt wc -l split2.txt # 50 grep _3 *.bed | sed -e 's/_3//' -e 's/.bed:.*EN/.EN/' > split3.txt wc -l split3.txt # 18 ssh hgwdev cd /cluster/data/encode/ortho2/lifted mkdir fa.3+ csh regionSeq split3.txt encodeRegions2 fa.3+ cd fa.3+ tar cvfz /usr/local/apache/htdocs/kate/encode/orthologs.split3.fa.gz *fa