#!/bin/tcsh -efx # Script to create a relational version of UniProt database. Should be run on # hgwdev. # NOTE: the next person to build this should set a variable DB, and then # use $DB instead of substituting 120323 with whatever the next data is! # Set up working directory mkdir -p /hive/data/outside/swissprot/120323/build # Download swissprot. This will take about 12 hours cd /hive/data/outside/swissprot/120323/build wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.dat.gz # 460,569,170 292K/s in 24m 27s wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz # 8,377,086,168 172K/s in 11h 2m wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz # 6,672,072 173K/s in 32s # Turn flat file into relational tab-separated files. zcat *.dat.gz | spToDb stdin ../tabFiles # Create the database. hgsql mysql <