#!/usr/bin/awk -f # # dbload-times dblog1 [dblog2 ...] # # Analyze gbLoadRna log files and generate report of were time is spent, # Probably need at least -verbose=1 # function die(msg) { print "Error: " msg > "/dev/stderr"; exit(1); } function getHours(sec) { return sprintf("%0.3f", (sec/(60.0*60.0))); } BEGIN { OFS="\t"; } # begin line, get host # hgw1 2006.07.20-16:30:02 dbload: begin /dbload: begin$/ { host = $1; hosts[host] = 1; } # Count of sequences changes # gbLoadRna: genbank.136.0 hg16 native,xeno mrna: delete=0 seqChg=0 metaChg=0 extChg=0 new=552880 orphan=0 noChg=0 /^gbLoadRna: .* delete=/ { db=$4; if ($3 ~ /^genbank/) { srcDb = "genbank"; } else if ($3 ~ /^refseq/) { srcDb = "refseq"; } else { die("can't parse srcDb: " $0); } if ($6 ~ /^mrna/) { type = "mrna"; } else if ($6 ~ /^est/) { type = "est"; } else { die("can't parse type: " $0); } key = host "\t" db "\t" srcDb "\t" type; keys[key] = 1; if (match($0, "delete=([0-9]*) seqChg=([0-9]*) metaChg=([0-9]*) extChg=([0-9]*) new=([0-9]*) orphan=([0-9]*) noChg=([0-9]*)", parts) == 0) { die("can't parse counts from: " $0); } deleteCnt[key] += parts[1]; seqChgCnt[key] += parts[2]; metaChgCnt[key] += parts[3]; extChgCnt[key] += parts[4]; newCnt[key] += parts[5]; orphanCnt[key] += parts[6]; noChgCnt[key] += parts[7]; deleteTotalCnt[host] += parts[1]; seqChgTotalCnt[host] += parts[2]; metaChgTotalCnt[host] += parts[3]; extChgTotalCnt[host] += parts[4]; newTOtalCnt[host] += parts[5]; orphanTotalCnt[host] += parts[6]; noChgTotalCnt[host] += parts[7]; } # load time # <-load for genbank.154.0 anoGam1 native,xeno mrna: real=143.37 cpu=23.13 sys=10.31 wait=109.93 childCpu=0.04 childSys=0.04 tod=2006-07-20T16:32:38 /<-load for .* real=/ { if (match($0, "real=([0-9.]*)", parts) == 0) { die("can't parse load time: " $0); } dbLoadTime[key] += parts[1]; dbLoadTotalTime[host] += parts[1]; } END { # report by db/srcDb/type print "host", "db","srcdb","type", "delete","seqChg","metaChg","extChg","new","orphan","noChg","hours"; for (key in keys) { print key, deleteCnt[key], seqChgCnt[key], metaChgCnt[key], extChgCnt[key], newCnt[key], orphanCnt[key], noChgCnt[key],getHours(dbLoadTime[key]); } for (host in hosts) { print host,"total","","", deleteTotalCnt[host], seqChgTotalCnt[host], metaChgTotalCnt[host], extChgTotalCnt[host], newTotalCnt[host], orphanTotalCnt[host], noChgTotalCnt[host],getHours(dbLoadTotalTime[host]); } }