#!/usr/bin/ruby -w # encodeLoad.rb - load ENCODE data submission generated by the # automated submission pipeline # Reads load.ra for information about what to do # Writes error or log information to STDOUT # Returns 0 if load succeeds. # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit the CVS'ed source at: # $Header: /projects/compbio/cvsroot/kent/src/hg/encode/encodeLoad/doEncodeLoad.rb,v 1.10 2008/03/05 23:08:34 galt Exp $ $scripts = "/cluster/bin/scripts" require "#{$scripts}/err.rb" require "#{$scripts}/verbose.rb" require "#{$scripts}/ra.rb" #require "FileUtils" require "tempfile" # Global constants # Global variables $loadRa = 'out/load.ra' $unloadRa = 'out/unload.ra' $submitDir = "" $submitType = "" $tempDir = "/data/tmp" $encodeDb = "hg18" $encInstance = "" $encProject = "" $sqlCreate = "/cluster/bin/sqlCreate" def usage errAbort "usage: doEncodeLoad.rb submission_type project_submission_dir\n" end def loadGene(tableName, fileList) #TODO replace head -999 with cat if system( "head -1000 -q #{fileList} | egrep -v '^track|browser' | ldHgGene -genePredExt #{$encodeDb} #{tableName} stdin > out/loadGene.out 2>&1") print "#{fileList} Loaded\n" #debug restore: File.delete "genePred.tab" else STDERR.print "ERROR: File(s) '#{fileList}' failed gene load.\n" errAbort File.read("out/loadGene.out") end end def loadWig(tableName, fileList) #TEST by replacing "cat" with "head -1000 -q" if system( "cat #{fileList} | wigEncode stdin stdout #{tableName}.wib | hgLoadWiggle -pathPrefix=/gbdb/#{$encodeDb}/wib -tmpDir=#{$tempDir} #{$encodeDb} #{tableName} stdin >out/loadWig.out 2>&1" ) system( "rm -f /gbdb/#{$encodeDb}/wib/#{tableName}.wib" ) system( "ln -s #{tableName}.wib /gbdb/#{$encodeDb}/wib" ) print "#{fileList} Loaded\n" else STDERR.print "ERROR: File(s) #{fileList} failed wiggle load.\n" errAbort File.read("out/loadWig.out") end end def loadBed(tableName, fileList) #TEST by replacing "cat" with "head -1000 -q" cmd = "cat #{fileList} | egrep -v '^track|browser' | hgLoadBed #{$encodeDb} #{tableName} stdin -tmpDir=out >out/loadBed.out 2>&1" #STDERR.puts "debug: cmd = [#{cmd}]" #debug if system( cmd ) print "#{fileList} Loaded\n" #debug restore: File.delete "out/bed.tab" else STDERR.print "ERROR: File(s) #{fileList} failed bed load.\n"; errAbort File.read("out/loadBed.out") end end def loadBed5Plus(tableName, fileList, sqlTable) unless File.exist? "#{$sqlCreate}/#{sqlTable}.sql" errAbort "#{$sqlCreate}/#{sqlTable}.sql not found " end sql = File.read "#{$sqlCreate}/#{sqlTable}.sql" unless sql.gsub! sqlTable, tableName errAbort "sql names do not match for substitution: #{$sqlTable} #{tableName}" end #STDERR.puts "sql=[#{sql}]" #debug temp_file = Tempfile.new('sql') temp_file.print sql temp_file.flush #TEST by replacing "cat" with "head -1000 -q" cmd = "cat #{fileList} | egrep -v '^track|browser' | hgLoadBed #{$encodeDb} #{tableName} stdin -tmpDir=out -sqlTable=#{temp_file.path} >out/loadBed.out 2>&1" #STDERR.puts "debug: cmd = [#{cmd}]" #debug if system( cmd ) print "#{fileList} Loaded\n" #debug restore: File.delete "out/bed.tab" else STDERR.print "ERROR: File(s) #{fileList} failed bed load.\n" errAbort File.read("out/loadBed.out") end temp_file.close end ############################################################################ # Main # Change dir to submission directory obtained from command-line if ARGV.length != 2 usage end $submitType = ARGV[0] # currently not used $submitDir = ARGV[1] $encInstance = File.dirname($submitDir) $encProject = File.basename($submitDir) und = $encInstance.rindex('_') if und $encInstance = $encInstance[und,$encInstance.length] else $encInstance = "" end Dir.chdir $submitDir # clean out any stuff from previous load unless system("doEncodeUnload.rb #{$submitType} #{$submitDir}") errAbort "expected error running doEncodeUnload.rb cleanup script" end unless File.exist? $loadRa errAbort "unexpected error: load.ra not found\n" end #TODO change to : FileUtils.cp $loadRa, $unloadRa system "cp #{$loadRa} #{$unloadRa}" verbose 2, "$encInstance=[#{$encInstance}] und=#{und}\n" verbose 1, "Loading project in directory #{$submitDir}\n" # Load files listed in load.ra #debug savev = $opt_verbose if $opt_verbose < 2 # $opt_verbose = 2 end ra = readRaFile $loadRa $opt_verbose = savev #debug #STDERR.puts "debug: ra.length=#{ra.length}\n" #debug #STDERR.puts "debug: #{ra.inspect}\n" #debug STDERR.puts "\n" #debug ra.each do |x| h = x[1] tablenameExt = "#{h["tablename"]}#{$encInstance}_#{$encProject}" verbose 2, "debug: #{x[0]}\n" verbose 2, " tablename #{h["tablename"]}\n" verbose 2, " type #{h["type"]}\n" verbose 2, " tableType #{h["tableType"]}\n" verbose 2, " assembly #{h["assembly"]}\n" verbose 2, " files #{h["files"]}\n" verbose 2, " tablenameExt #{tablenameExt}\n" # temporary work-around $encodeDb = h["assembly"] case h["type"] when "genePred" loadGene tablenameExt, h["files"] when "wig" loadWig tablenameExt, h["files"] when "bed 5 +" loadBed5Plus tablenameExt, h["files"], h["tableType"] when "bed 3" loadBed tablenameExt, h["files"] when "bed 4" loadBed tablenameExt, h["files"] when "bed 5" loadBed tablenameExt, h["files"] when "bed 6" loadBed tablenameExt, h["files"] else errAbort "unexpected error: unknown type #{h["type"]} in load.ra\n" end STDERR.puts "\n" #debug end exit 0