Looking for %d motifs in %d sequences. Longest sequence is %d bases.
\n", numMotifs, goodSeqListSize, goodSeqElSize); printf("Settings are %s location; %d occurrences per sequence; %s align; ", (useLocation ? "use" : "ignore"), maxOcc, (leftAlign ? "left" : "right") ); printf("training weights %s; initial motif size %d; ", (kentishWeights ? "Kentish" : "classical"), defaultTileSize); printf("restrain expansionist tendencies %f; number of sequences in initial scan %d; ", constrainer, startScanLimit); backgroundName = bgSource; if (backgroundName == NULL) backgroundName = badName; if (backgroundName == NULL) backgroundName = "same as foreground"; printf("background model %s; background data %s;
", (nullModelCgiName == NULL ? "Markov 0" : nullModelCgiName), backgroundName); printf("\n"); progress("This run would take about %2.1f minutes on a lightly loaded UCSC CSE web server.", calcApproximateTime()); htmlHorizontalLine(); doTopTiles(numMotifs, tn.forCgi, logFile); colorProfile(tn.forCgi, goodSeq, goodSeqElSize); printf("\n"); freeSeqList(&goodSeq); endTime = clock1000(); htmlHorizontalLine(); printf("Calculation time was %4.3f minutes\n", 0.001*(endTime-startTime)/60); } void doRandomTest(char *badName, boolean premade) /* Generate tables for scores on random sequences. */ { int seqLen, seqCount; struct tempName randTn, profTn; FILE *logFile = mustOpen("\\temp\\random.txt", "w"); int reps = 2; int i; makeTempName(&randTn, "rand", ".fa"); makeTempName(&profTn, "rand", ".pfl"); printf("
\n"); for (seqLen = 100; seqLen <= 500; seqLen += 100) for (seqCount = 100; seqCount <= 100; seqCount += 10) { goodName = randTn.forCgi; for (i=1; i<=reps; ++i) { fprintf(logFile, "----------------- %d Random sequence of %d nucleotides take %d----------------\n", seqCount, seqLen, i); generate(goodName, seqCount, seqLen); oneSearchSet(badName, premade, logFile); } } fclose(logFile); } void doMiddle() /* Generate middle part of html file. In this case just read all the cgi variables * and then call routine to actually process. */ { char *nullModelCgi = "background"; char *badName = NULL; char badNameBuf[512]; boolean premade = FALSE; boolean isRandomTest = cgiBoolean("randomTest"); boolean isControlRun = cgiBoolean("controlRun"); printf("Improbizer Results
\n"); leftAlign = cgiBoolean("leftAlign"); if (cgiVarExists(nullModelCgi)) { nullModelCgiName = cgiEncode(cgiString(nullModelCgi)); nullModel = cgiOneChoice(nullModelCgi, nullModelChoices, ArraySize(nullModelChoices)); } if (cgiVarExists("maxOcc")) maxOcc = cgiInt("maxOcc"); if (cgiVarExists("tileSize")) defaultTileSize = cgiInt("tileSize"); if (cgiVarExists("startScanLimit")) startScanLimit = cgiInt("startScanLimit"); if (cgiVarExists("numMotifs")) numMotifs = cgiInt("numMotifs"); if (cgiVarExists("constrainer")) constrainer = cgiDouble("constrainer"); if (cgiVarExists("trainingWeights")) { char *tw = cgiString("trainingWeights"); if (sameWord(tw, "Kentish")) kentishWeights = TRUE; else if (sameWord(tw, "classical")) kentishWeights = FALSE; else errAbort("Unknown trainingWeights %s", tw); } useLocation = !cgiBoolean("ignoreLocation"); if (!isRandomTest) { if (cgiVarExists("goodText")) { pasteToFa("goodText", &goodName, &goodSeqListSize, &goodSeqElSize); if (goodSeqListSize <= 0) errAbort("You need to paste in something. Go back and try again!"); } if (goodName == NULL) goodName = cgiString("good"); } if (cgiVarExists("badText")) { int numSeq, elSize; pasteToFa("badText", &badName, &numSeq, &elSize); if (numSeq <= 0) badName = NULL; } if (badName == NULL) badName = cgiOptionalString("bad"); /* If they selected a premade background, figure out file that goes with it, * then look up directory to find file in. */ if ((bgSource = cgiOptionalString("backgroundDataSource")) != NULL) { char *premadeBg = NULL; if (sameString(bgSource, "Worm Intron 3'")) premadeBg = "wormInt3"; else if (sameString(bgSource, "Worm Intron 5'")) premadeBg = "wormInt5"; else if (sameString(bgSource, "Yeast Promoter")) premadeBg = "yeastPromo"; else if (sameString(bgSource, "Same as Foreground")) ; else if (sameString(bgSource, "From Data Pasted Below")) ; else errAbort("Unknown backgroundDataSource"); if (premadeBg != NULL) { makePremadeBgPathName(premadeBg, badNameBuf, sizeof(badNameBuf)); badName = badNameBuf; premade = TRUE; } } getNullModel(goodName, badName, premade); if (isControlRun) goodName = randomSpoof(goodName); if (isRandomTest ) { puts("Random test mode - this will take a good long time. Be sure to kill " "the process if you get impatient.
\n"); doRandomTest(badName, premade); } else { if (isFromWeb && calcApproximateTime() > 5.0) { errAbort("Sorry, this job is too big for our web server - it would use about " "%2.1f minutes of CPU time. Out of fairness to the other users of this " "machine we limit jobs to 5.0 minutes of CPU time or less. Please reduce " "the size of your data (now %d sequences of %d bases each), the number of " "motifs you're looking for (now %d), or the number of sequences in the initial " "scan (now %d). The most important influence on run time is the maximum size " "of an individual sequence. If you really need to run the program on a data " "set this large contact Jim Kent (kent@biology.ucsc.edu) to get a batch version " "of this program to run on your own machine.", calcApproximateTime(), goodSeqListSize, goodSeqElSize, numMotifs, startScanLimit ); } puts("Improbizer will display the results in parts. First it will " "display the profiles (consensus sequences with the probability of " "each base at each position) individually as they are calculated. The " "position of a profile in a sequence is indicated by upper case. The " "strength of the profile match is indicated by the score on the left. " "There will be a delay during this phase as each profile is calculated. " "Second Improbizer will " "display all profiles at one over each sequence. Each profile " "has it's own color and the stronger the profile matches the darker " "it will appear in the sequence. Finally there will be a graphic " "summary of all the profiles at the end, using the same color " "conventions.
"); oneSearchSet(badName, premade, NULL); } } int main(int argc, char *argv[]) { //pushCarefulMemHandler(); initProfileMemory(); dnaUtilOpen(); statUtilOpen(); initRandom(); isFromWeb = cgiIsOnWeb(); if (!isFromWeb && !cgiSpoof(&argc, argv)) { errAbort("ameme - find common patterns in DNA\n" "usage\n" " ameme good=goodIn.fa [bad=badIn.fa] [numMotifs=2] [background=m1] [maxOcc=2]\n" "where goodIn.fa is a multi-sequence fa file containing instances\n" "of the motif you want to find, badIn.fa is a file containing similar\n" "sequences but lacking the motif, numMotifs is the number of motifs\n" "to scan for, background is m0,m1, or m2 for various levels of Markov\n" "models, and maxOcc is the maximum occurrences of the motif you \n" "expect to find in a single sequence\n"); } /* Print out html header. Make background color brilliant white. */ puts("Content-Type:text/html\n"); printf("\n%s \n\n\n", "Improbizer Results"); puts("\n"); /* Wrap error handling et. around doMiddle. */ htmEmptyShell(doMiddle, NULL); //carefulCheckHeap(); /* Write end of html. */ htmlEnd(); return 0; }