#!/usr/bin/awk -f # # acc-list-cmp expectAccs gotAccs # # Compare two files containing lists of accessions. The accessions may # occur multiple times. Output is a file with the columns: # acc change delta # # change is: # new - acc is new in gotAccs, delta is the number added # del - acc is deleted in gotAccs, delta is the negative of the number # deleted # chg - +/- change in number BEGIN { OFS = "\t"; } { # table counted in depends on file acc = $1; if (ARGIND == 1) { expectAcc[acc]++; } else { gotAcc[acc]++; } allAcc[acc] = acc; } END { # change to a indexed and sorted array numAcc = asort(allAcc); for (i = 1; i <= numAcc; i++) { acc = allAcc[i]; expectCnt = (expectAcc[acc] == "") ? 0 : expectAcc[acc]; gotCnt = (gotAcc[acc] == "") ? 0 : gotAcc[acc]; if (gotCnt == 0) { stat = "del"; } else if (expectCnt == 0) { stat = "new"; } else if (gotCnt != expectCnt) { stat = "chg"; } else { stat = "same"; } if (stat != "same") { print acc,stat,(gotCnt-expectCnt); } } }