#include "common.h" #include "memalloc.h" #include "errabort.h" #include "dnautil.h" #include "fa.h" #include "dnaseq.h" #include "oldGff.h" #include "wormdna.h" #include "fuzzyFind.h" #include "cheapcgi.h" #include "htmshell.h" void debugListAli(struct ffAli *ali, DNA *needle, DNA *hay) /* Print out ali list for debugging purposes. */ { int i = 1; while (ali->left != NULL) ali = ali->left; while (ali != NULL) { htmlParagraph("%d needle %d %d haystack %d %d", i, ali->nStart-needle, ali->nEnd-needle, ali->hStart-hay, ali->hEnd-hay); htmlHorizontalLine(); ali = ali->right; ++i; } } enum sv { svBoth, svForward, svReverse, }; void needleInHay(char *rawNeedle, char *rawHay, enum sv strand, enum ffStringency stringency, char *needleName, char *hayName, int hayNumOffset) /* Filter input to get rid of non-DNA cruft. Then find best match to needle in * haystack and display it. */ { long needleSize = dnaFilteredSize(rawNeedle); long haySize = dnaFilteredSize(rawHay); DNA *needle = needMem(needleSize+1); DNA *mixedCaseNeedle = needMem(needleSize+1); DNA *hay = needMem(haySize+1); DNA *mixedCaseHay = needMem(haySize+1); DNA *needleEnd = needle + needleSize; DNA *hayEnd = hay + haySize; struct ffAli *bestAli; struct ffAli *forwardAli = NULL; struct ffAli *reverseAli = NULL; int forwardScore = -0x7fff; int reverseScore = -0x7fff; dnaMixedCaseFilter(rawNeedle, mixedCaseNeedle); dnaMixedCaseFilter(rawHay, mixedCaseHay); dnaFilter(mixedCaseNeedle,needle); dnaFilter(mixedCaseHay, hay); if (strand == svBoth || strand == svForward) { forwardAli = ffFind(needle, needleEnd, hay, hayEnd, stringency); forwardScore = ffScoreCdna(forwardAli); } if (strand == svBoth || strand == svReverse) { reverseComplement(needle, needleSize); reverseAli = ffFind(needle, needleEnd, hay, hayEnd, stringency); reverseScore = ffScoreCdna(reverseAli); } bestAli = forwardAli; if (reverseScore > forwardScore) { bestAli = reverseAli; } if (!bestAli) { errAbort("Couldn't find an allignment between %s and %s", needleName, hayName); } memcpy(needle, mixedCaseNeedle,needleSize); memcpy(hay, mixedCaseHay,haySize); /* debugListAli(bestAli,needle,hay); */ puts("

Alignment Views

"); ffShowAli(bestAli, needleName, needle, 0, hayName, hay, hayNumOffset, bestAli == reverseAli); ffFreeAli(&forwardAli); ffFreeAli(&reverseAli); } struct cgiChoice stringencyChoices[] = { {"exactly", ffExact}, {"cDNA", ffCdna}, {"tightly", ffTight}, {"loosely", ffLoose}, }; struct cgiChoice strandChoices[] = { {"both", svBoth,}, {"forward", svForward,}, {"reverse", svReverse,}, }; void directDoMiddle() /* Grab the form variables from CGI and call routine that does real * work on them. */ { char *rawNeedle = cgiString("needle"); char *rawHay = cgiString("hayStack"); enum ffStringency stringency = cgiOneChoice("stringency", stringencyChoices, ArraySize(stringencyChoices)); enum sv strand = cgiOneChoice("strand", strandChoices, ArraySize(strandChoices)); needleInHay(rawNeedle, rawHay, strand, stringency, "needle", "haystack", 0); } void upcExons(struct gffGene *gene) { GffExon *exon; for (exon = gene->exons; exon != NULL; exon = exon->next) { toUpperN(gene->dna + exon->start, exon->end - exon->start+1); } } void showInfo(struct wormCdnaInfo *info, struct dnaSeq *cdna) /* Display some info, with hyperlinks and stuff. */ { if (info->description != NULL) { char *gene = info->gene; char *cdnaName = cdna->name; puts("

cDNA Information and Links

"); printf("

%s

\n", info->description); if (gene) { printf("Literature on gene: "); printf("", gene); printf("%s
\n", gene); } if (info->product) { char *encoded = cgiEncode(info->product); printf("Literature on product: "); printf("", encoded); printf("%s
\n", info->product); freeMem(encoded); } if (info->knowStart) { char protBuf[41]; dnaTranslateSome(cdna->dna + info->cdsStart-1, protBuf, sizeof(protBuf)); printf("Translation: "); printf("", cdnaName, info->cdsStart); printf("%s", protBuf); if (strlen(protBuf) == sizeof(protBuf)-1) printf("..."); printf("
\n"); } printf("GenBank accession: %s
\n", cdnaName); htmlHorizontalLine(); } } void lookupDoMiddle() { char *cdnaName; char *geneName = cgiString("gene"); struct dnaSeq *cdna; enum sv strand = svBoth; enum ffStringency stringency = ffCdna; char hayStrand = '+'; DNA *dna; if (cgiVarExists("strand")) { strand = cgiOneChoice("strand", strandChoices, ArraySize(strandChoices)); } if (cgiVarExists("stringency")) { stringency = cgiOneChoice("stringency", stringencyChoices, ArraySize(stringencyChoices)); } if (cgiVarExists("hayStrand")) hayStrand = cgiString("hayStrand")[0]; if ((cdnaName = cgiOptionalString("cDNA")) != NULL) { struct wormCdnaInfo info; if (!wormCdnaSeq(cdnaName, &cdna, &info)) errAbort("Couldn't find cDNA %s\n", cdnaName); showInfo(&info, cdna); wormFreeCdnaInfo(&info); } else if ((cdnaName = cgiOptionalString("needleFile")) != NULL) { cdna = faReadDna(cdnaName); cdnaName = "pasted"; } else { errAbort("Can't find cDNA or needleFile in cgi variables."); } if (wormIsChromRange(geneName)) { char *dupeName = cloneString(geneName); char *chrom; int start, end; wormParseChromRange(dupeName, &chrom, &start, &end); dna = wormChromPartExonsUpper(chrom, start, end-start); if (hayStrand == '-') reverseComplement(dna, end-start); needleInHay(cdna->dna, dna, strand, stringency, cdnaName, geneName, start); freeMem(dupeName); freeMem(dna); } else if (wormIsNamelessCluster(geneName)) { dna = wormGetNamelessClusterDna(geneName); needleInHay(cdna->dna, dna, strand, stringency, cdnaName, geneName, 0); freeMem(dna); } else if (getWormGeneDna(geneName, &dna, TRUE)) { needleInHay(cdna->dna, dna, strand, stringency, cdnaName, geneName, 0); } else { char *chrom; int start, end; char hayStrand; if (!wormGeneRange(geneName, &chrom, &hayStrand, &start, &end)) errAbort("Can't find %s", geneName); dna = wormChromPartExonsUpper(chrom, start, end-start); if (hayStrand == '-') reverseComplement(dna, end-start); needleInHay(cdna->dna, dna, strand, stringency, cdnaName, geneName, start); freeMem(dna); } } void doMiddle() /* Decide whether to do middle on data posted, or to * look it up from cDNA/gene names. */ { fprintf(stdout, "\n"); if (cgiVarExists("gene")) lookupDoMiddle(); else directDoMiddle(); fprintf(stdout, "\n"); } void debugDoMiddle() { fprintf(stdout, "\n"); needleInHay( "ggcacgagggtatctcaccgactctgccttccatctcaaaccaggacaca" "cacacactctctctctctctctctctctctctctctgtctctctgtctct" "ctctctttcgggcatttgtccccagagagtgcctagagacttcacagcct" "tggccctggaaacccctagacagccgctatgttgccaggcacggctctgg" "gcactgaggctacagcaatgaaaaaatcagccaagttctctgccttcatg" "gtgctcacattctaggcagagaaagacagatgatcaacaagtgaaaaaat" "cataaagctcaggtcatggtgtggcaagtattagagtggagagcgatggg" "gtggggtgggggcgctgttttatatggggtggtccaaaaatatcttggtg" "aggtggtgacatttgagtggaaacctggacagcaagaagctagtcgtgct" "ttggggtcaaaaggactccaaaatttcagttttttaaatggaaaacatgt" "gtttacccataaacattaaagagcagggaaattag" , "gaggcaggcctaggcctgggctcccagcttggggcagcagagcagatccc" "ttcaagggagaaaccacagatatgccccagcctctccttgatgctgtgag" "tcaggggtgcttagaaaggctcgtgttcagttccaaatgcccagggtcac" "cacgaaggaggtgctgccccctcccctgcaccccaagcaacctgcatctg" "catggccctggagaggccattgctcctgatttccctcaggaaacatggcc" "agggagctgctgtgagagttttccccgagtccccacctccctgagatgta" "caatgagggaagggaagaggtatctcaccgacttctgcccttccatctca" "aacaaagacacacacacactctctctctctctctctctctctctctctct" "gtctctctgtctctctctctttctggcatttgtccccagagagtgcctag" "agacttcacagccttggccctggaaacccctagacagccgctatgttgcc" "aggcacggctctgggcactgaggctacagcaatgaaaaaatcagccaagt" "tctctgccttcatggtgctcacattctaggcagagaaagacagatgatca" "acaagtgaaaaaatcataaagctcaggtcatggtgtggcaagtattagag" "tggagagcgatggggtggggtgggggcgctgttttatatggggtggtcca" "aaaatatcttggtgaggtggtgacatttgagtggaaacctggacagcaag" "aagctagtcgtgctttggggtcaaaaggactccaaaatttcagtttttta" "aatggaaaacatgtgtttacccataaacattaaagagcagggaaattaga" "actatgtttctggagcacctattatatgtctagcaccatgatggaaaatt" "catagacatcatttcccaccgccctcttcagaaccctgtgtgtcacatag" "cattgttttcattttacagatatcaaatgagaaatccatagagattattt" "cacgcatttaataagatttattgagtattcacctctgaaaacactgtaat" "aaatcccatacggaccctgccctcctagagcctacagtctgtgacagcga" "ggagaacggtca", svForward, ffCdna, "debugNeedle", "debugHaystack", 0); fprintf(stdout, "\n"); } int main(int argc, char *argv[]) { static char envBuf[256]; dnaUtilOpen(); if (argc == 2) { //sprintf(envBuf, "QUERY_STRING=%s", argv[1]); //putenv(envBuf); debugDoMiddle(); } else htmShell("FuzzyFinder Results", doMiddle, NULL); carefulCheckHeap(); return 0; }