#!/bin/csh -f # "Make doc" for creating AGP files for ENCODE ortholog regions based on # coordinates in bed files in the "edited" directory # TODO: contigAcc files should go in $outDir (neater this way) cd /cluster/data/encode/synteny/agp # create script to run AGP-maker on an assembly cat > genomeAgps.csh << 'EOF' # create AGP's for a genome assembly if ($#argv != 3) then echo "usage: $0 " exit 1 endif set bedDir = ../edited set db = $1 set org = $2 set outDir = $3 mkdir -p $outDir set buildDir = /cluster/data/$db set bedFile = $bedDir/$db.bed if (-f $org.contig.tab) then set contigArg = "-contigFile=$org.contig.tab" else set contigArg = "" endif if (-f $buildDir/chrom.lst) then cat $buildDir/chrom.lst | \ xargs -iX cat $buildDir/X/chr{X,X_random}.agp | \ regionAgp $contigArg \ -namePrefix=${org}_ $bedFile stdin -dir $outDir else cat $buildDir/?{,?}/*.agp | regionAgp $contigArg \ -namePrefix=${org}_ $bedFile stdin -dir $outDir endif 'EOF' # << this line makes emacs coloring happy # MOUSE # Create AGP's csh genomeAgps.csh mm3 mouse mm3 # Create packing list /cluster/data/encode/bin/scripts/encodeRegionPackingList \ ../edited/mm3.bed mm3 mouse "Mus musculus" 10090 C57BL/6J FEB-2003 \ mm3 "NCBI Build 30" > mm3/mm3.packing.list # Copy to downloads area cd mm3 set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps cp mm3.packing.list $DOWNLOAD_DIR tar cvfz mouse.agp.tar.gz *.agp mv mouse.agp.tar.gz $DOWNLOAD_DIR cd .. # CHICKEN # Create AGP's csh genomeAgps.csh galGal2 chicken galGal2.new mv galGal2.new galGal2 # Get contig to accession mapping (doced in makeGalGal2.doc) hgsql galGal2 -s -e "select * from contigAcc" > chicken.contig.tab # Create packing list /cluster/data/encode/bin/scripts/encodeRegionPackingList ../edited/galGal2.bed galGal2 chicken "Gallus gallus" 9031 N/A FEB-2004 galGal2 "CGSC Feb. 2004" > galGal2/galGal2.packing.list # Copy to downloads area cd galGal2 set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps cp galGal2.packing.list $DOWNLOAD_DIR tar cvfz chicken.agp.tar.gz *.agp mv chicken.agp.tar.gz $DOWNLOAD_DIR cd .. # RAT # Get contig to accession map cd /cluster/data/encode/synteny/agp hgsql rn3 -s -e "select * from contigAcc" > rat.contig.tab # Create AGP's csh genomeAgps.csh rn3 rat rn3.new mv rn3.new rn3 # Create packing list /cluster/data/encode/bin/scripts/encodeRegionPackingList ../edited/rn3.bed rn3 rat "Rattus norvegicus" 10116 BN/SsNHsdMCW JUN-2003 rn3 "Baylor HGSC v3.1" > rn3/rn3.packing.list # Copy to downloads area cd rn3 set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps cp rn3.packing.list $DOWNLOAD_DIR tar cvfz rat.agp.tar.gz *.agp mv rat.agp.tar.gz $DOWNLOAD_DIR cd $DOWNLOAD_DIR tar tvfz rat* | wc -l # 51 grep Region rn3.packing.list | wc -l # 51 cd .. # CHIMP # Get contig to accession map cd /cluster/data/encode/synteny/agp hgsql panTro1 -s -e "select * from contigAcc" > chimp.contig.tab # Create AGP's # NOTE: next time, put chimpChromContigs.agp into $outDir cat /cluster/data/panTro1/?{,?}/*.agp | \ chimpChromContigAgp stdin /cluster/data/panTro1/assembly.agp chimpChromContigs.agp mkdir -p panTro1.new regionAgp -contigFile=chimp.contig.tab \ -namePrefix=chimp_ ../edited/panTro1.bed chimpChromContigs.agp -dir panTro1.new mv panTro1.new panTro1 # Create packing list /cluster/data/encode/bin/scripts/encodeRegionPackingList ../edited/panTro1.bed panTro1 chimp "Pan troglodytes" 9598 N/A NOV-2003 panTro1 "NCBI Build 1 v1" > panTro1/panTro1.packing.list # Copy to downloads area cd panTro1 set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps cp panTro1.packing.list $DOWNLOAD_DIR tar cvfz chimp.agp.tar.gz *.agp mv chimp.agp.tar.gz $DOWNLOAD_DIR cd $DOWNLOAD_DIR tar tvfz chimp* | wc -l # 55 grep Region panTro1.packing.list | wc -l # 55 cd .. # TESTS --------------------------------------------------------- rm -fr testMouse mkdir -p testMouse cat /cluster/data/mm3/{6,11}/*.agp | regionAgp tests/test.bed stdin -namePrefix=mouse_ -dir testMouse /cluster/data/encode/bin/scripts/encodeRegionPackingList tests/test.bed testMouse mouse "Mus musculus" 10090 C57BL/6J FEB-2003 mm3 "NCBI Build 30" rm -fr testChicken mkdir -p testChicken hgsql galGal2 -s -e "select * from contigAcc" > chicken.contig.tab cat /cluster/data/galGal2/{1,13}/*.agp | regionAgp tests/test.chicken.bed stdin -namePrefix=chicken_ -dir testChicken -contigFile=chicken.contig.tab /cluster/data/encode/bin/scripts/encodeRegionPackingList tests/test.chicken.bed testChicken chicken "Gallus gallus" 9031 N/A FEB-2004 galGal2 "CGSC Feb. 2004"