#!/bin/tcsh -efx # Script to create a relational version of UniProt database. Should be run on # hgwdev. # NOTE: the next person to build this should set a variable DB, and then # use $DBDATE instead of substituting $DBDATE with whatever the next data is! set DBDATE=121210 set DB=sp$DBDATE # Set up working directory mkdir -p /hive/data/outside/uniProt/$DBDATE/build # Download uniProt. This will take about 12 hours cd /hive/data/outside/uniProt/$DBDATE/build wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.dat.gz # 473,685,422 462K/s in 13m 48s wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz # 12,088,824,987 342K/s in 11h 28m wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz # 6,932,064 1.01M/s in 10s # Turn flat file into relational tab-separated files. time zcat *.dat.gz | spToDb stdin ../tabFiles # real 32m21.930s # Create the database. hgsql mysql <