# braney 2010-12-13 get distribution mentioned in Felix's mail wget "ftp://ftp.sanger.ac.uk/pub/gencode/release_5/gencode5_GRCh37.tgz" # braney 2010-12-22 get new distribution with level 1 pseudogenes marked # properly cd /hive/groups/encode/dcc/data/gencodeV5 wget "ftp://ftp.sanger.ac.uk/pub/gencode/release_5/gencode5_GRCh37.tgz" cd gencode5 cat gencode.v5.annotation.level_1_2.classes gencode.v5.annotation.level_3.classes gencode.v5.polyAs.classes gencode.v5.2wayconspseudos.GRCh37.classes | hgLoadSqlTab hg19 wgEncodeGencodeClassesV5 ~/kent/src/hg/lib/gencodeGeneClass.sql stdin gtfToGenePred -genePredExt -geneNameAsName2 gencode.v5.annotation.level_3.gtf stdout | sed 's/ENSTR/ENST0/' > gencode.v5.annotation.level_3.gp cat gencode.v5.annotation.level_3.gp | hgLoadGenePred -genePredExt hg19 wgEncodeGencodeAutoV5 stdin tawk '$3=="transcript"{$3="exon"}{print $0}' gencode.v5.2wayconspseudos.GRCh37.gtf |gtfToGenePred -genePredExt stdin gencode.v5.2wayconspseudos.GRCh37.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencode2wayConsPseudoV5 gencode.v5.2wayconspseudos.GRCh37.gp gtfToGenePred -genePredExt -geneNameAsName2 gencode.v5.annotation.level_1_2.gtf stdout | sed 's/ENSTR/ENST0/' > gencode.v5.annotation.level_1_2.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodeManualV5 gencode.v5.annotation.level_1_2.gp tawk ' $3=="polyA_site"{$3="exon"} $3=="polyA_signal"{$3="exon"} $3=="pseudo_polyA"{$3="exon"} {print $0}' gencode.v5.polyAs.gtf | gtfToGenePred -genePredExt stdin gencode.v5.polyAs.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodePolyaV5 gencode.v5.polyAs.gp cd metadata cat > map << _EOF_ gencode_Exon_supporting_feature gencodeExonSupport.sql wgEncodeGencodeExonSupportV5 gencode_HGNC gencodeGeneSymbol.sql wgEncodeGencodeGeneSymbolV5 gencode_PDB gencodePdb.sql wgEncodeGencodePdbV5 gencode_Pubmed_id gencodePubMed.sql wgEncodeGencodePubmedV5 gencode_RefSeq gencodeRefSeq.sql wgEncodeGencodeRefSeqV5 gencode_Source gencodeSource.sql wgEncodeGencodeSourceV5 gencode_SwissProt gencodeUniProt.sql wgEncodeGencodeUniProtV5 gencode_Transcript_supporting_feature gencodeTranscriptSupport.sql wgEncodeGencodeTranscriptSupportV5 _EOF_ for i in gencode*; do set `grep $i map`; echo "hgLoadSqlTab hg19 $3 /hive/groups/encode/dcc/data/gencodeV5/sql/$2 $1" done > loadMetaJobs.txt for i in gencode*; do set `grep $i map`; echo "drop table $3;" done > dropMetaTableCommands.txt sh -x loadMetaJobs.txt