/* pbCalDistGlobal - Create tab delimited data files to be used by Proteome Browser stamps */ #include "common.h" #include "hash.h" #include "hCommon.h" #include "hdb.h" #include "spDb.h" #include "linefile.h" #define MAX_PROTEIN_CNT 10000000 void usage() /* Explain usage and exit. */ { errAbort( "pbCalDistGlobal- Create tab delimited data files to be used by Proteome Browser stamps.\n" "usage:\n" " pbCalDistGlobal spDb protsDb\n" " spDb is the name of SWISS-PROT database\n" " protsDb is the name of proteinsXXXXXX database\n" "Example: pbCalDistGlobal sp040915 proteins040915\n"); } int calDist(double *measure, int nInput, int nDist, double xMin, double xDelta, char *oFileName) /* calculate histogram distribution of a double array of nInput elements */ { int distCnt[1000]; double xDist[1000]; FILE *o3; int i,j; int highestCnt, totalCnt; int lowCnt, hiCnt; printf("processing %s\n", oFileName);fflush(stdout); assert(nDist < ArraySize(distCnt)); o3 = mustOpen(oFileName, "w"); for (j=0; j<=(nDist+1); j++) { distCnt[j] = 0; xDist[j] = xMin + xDelta * (double)j; } lowCnt = 0; hiCnt = 0; for (i=0; i= xDist[j]) && (measure[i] < xDist[j+1])) { distCnt[j]++; } } /* count values above xmax */ if (measure[i] >= xDist[nDist]) { hiCnt++; } } highestCnt = 0; totalCnt = 0; for (j=0; j highestCnt) highestCnt = distCnt[j]; totalCnt = totalCnt + distCnt[j]; } printf("\tdisplayedCnt=%d lowCnt=%d hiCnt=%d total=%d\n", totalCnt, lowCnt, hiCnt, totalCnt + hiCnt + lowCnt);fflush(stdout); totalCnt = totalCnt + hiCnt + lowCnt; if (totalCnt != nInput) errAbort("nInput %d is not equal totalCnt %d, aborting ...\n", nInput, totalCnt); for (j=0; j= MAX_PROTEIN_CNT) errAbort("Too many proteins - please set MAX_PROTEIN_CNT to be more than %d\n", MAX_PROTEIN_CNT); if ((icnt % 10000) == 0) { printf("%d done.\n", icnt); } } sqlFreeResult(&sr2); sqlDisconnect(&conn2); sqlDisconnect(&conn3); totalResCnt = 0; for (i=0; i<23; i++) { totalResCnt = totalResCnt + aaResCnt[i]; } /* write out residue count distribution */ for (i=0; i<20; i++) { aaResCntDouble[i] = ((double)aaResCnt[i])/((double)totalResCnt); fprintf(o2, "%d\t%f\n", i+1, (float)aaResCntDouble[i]); } fprintf(o2, "%d\t%f\n", i+1, 0.0); carefulClose(&o2); /* calculate and write out various distributions */ calDist(molWt, molWtCnt, 21, 0.0, 10000.0,"pepMolWtDist.tab"); calDist(pI, pIcnt, 61, 3.0, 0.2, "pepPiDist.tab"); calDist(avgHydro, icnt, 41, -2.0, 0.1, "pepHydroDist.tab"); calDist(cCountDouble, icnt, 51, 0.0, 1.0, "pepCCntDist.tab"); calDist(interProCountDouble,ipcnt, 16, 0.0, 1.0, "pepIPCntDist.tab"); return(0); }