/* fixalt - fix alt splice catalog - which has 3' and 5' splice sites * mixed up on minus strand. */ #include "common.h" #include "cda.h" #include "wormdna.h" #include "htmshell.h" struct altSpec { struct altSpec *next; char *hrefTag; char *orfName; boolean skipExon, skipIntron, nonGenomic; int alt3, alt5; int ieOverlap, iiOverlap; struct slName *cdna; }; char *eatHrefTag(char *line) { char *startTag = strchr(line, '<'); char *endTag = strchr(startTag, '>'); char *hrefTag = cloneStringZ(startTag, endTag-startTag+1); strcpy(startTag, endTag+1); return hrefTag; } char *skipJunkTag(char *pt) { static char *junks[] = {"", "", "", "", "" }; pt = skipLeadingSpaces(pt); if (pt[0] == '<') { int len; int i; for (i=0; i", lineBuf, 7) == 0) break; lineLen = strlen(lineBuf); if (lineLen < 3) continue; if (lineLen < 56) errAbort("Short line %d of %s\n", lineCount, fileName); if (lineLen == sizeof(lineBuf)-1) errAbort("Line longer than %d chars line %d of %s, oops.", lineLen, lineCount, fileName); linePt = unjunkLine(lineBuf); AllocVar(el); el->hrefTag = eatHrefTag(linePt); wordCount = chopLine(linePt, words); el->orfName = cloneString(words[0]); for (i=1; iskipExon = TRUE; else if (sameString(s, "IN")) el->skipIntron = TRUE; else if (sameString(s, "NG")) el->nonGenomic = TRUE; else if (s[1] == '\'') { if (s[0] == '3') el->alt3 = atoi(s+3); else if (s[0] == '5') el->alt5 = atoi(s+3); else errAbort("%s??? line %d of %s", s, lineCount, fileName); } else break; } if (i + 3 > wordCount) errAbort("Short line %d of %s", lineCount, fileName); el->ieOverlap = atoi(words[i]); el->iiOverlap = atoi(words[i+1]); for (i=i+2; icdna, newSlName(words[i])); slAddHead(&list, el); } slReverse(&list); fclose(f); return list; } char *boos(boolean boo, char *trues, char *falses) /* Return string reflecting boolean value. */ { return boo ? trues : falses; } char *alt3(struct altSpec *alt) { static char buf[12]; if (alt->skipExon || alt->skipIntron || alt->alt5 || alt->alt3 == 0) return " "; else { sprintf(buf, "3'(%d)", alt->alt3); return buf; } } char *alt5(struct altSpec *alt) { static char buf[12]; if (alt->skipExon || alt->skipIntron || alt->alt3 || alt->alt5 == 0) return " "; else { sprintf(buf, "5'(%d)", alt->alt5); return buf; } } void writeAlts(char *fileName, struct altSpec *altList) { FILE *f = mustOpen(fileName, "w"); struct altSpec *alt; struct slName *cdna; /* Write the start of a stand alone .html file. */ htmStart(f, "Fixed Alt-Splicing Catalog"); fprintf(f, "
");
fprintf(f, "-----------------------------\n");

for (alt = altList; alt != NULL; alt = alt->next)
    {
    fprintf(f, "%s%-13s   %s   %s   %-6s %-6s  %s %5d %5d  ",
        alt->hrefTag, alt->orfName, 
        boos(alt->skipExon, "EX", "  "), boos(alt->skipIntron, "IN", "  "),
        alt5(alt), alt3(alt), boos(alt->nonGenomic, "NG", "  "),  
        alt->ieOverlap, alt->iiOverlap);
    for (cdna = alt->cdna; cdna != NULL; cdna = cdna->next)
        fprintf(f, " %s", cdna->name);
    fputc('\n', f);
    }
/* Write the end of a stand-alone html file */
fprintf(f, "
"); htmEnd(f); fclose(f); } boolean isReverseStrand(char *htag, char *orfName) { char buf[1024]; char *words[100]; int wordCount; char *parts[16]; int partCount; char *chrom; int start, end; int plusCount = 0, minusCount = 0; int totalCount; struct cdaAli *aliList, *ali; boolean isRev = FALSE; strcpy(buf, htag); wordCount = chopString(buf, "=", words, ArraySize(words)); assert(wordCount > 3); partCount = chopString(words[2], "&", parts, ArraySize(parts)); if (!wormParseChromRange(parts[0], &chrom, &start, &end)) assert(FALSE); aliList = wormCdaAlisInRange(chrom, start, end); for (ali = aliList; ali != NULL; ali = ali->next) { if (cdaDirChar(ali, '+') == '>') ++plusCount; else ++minusCount; } totalCount = plusCount + minusCount; if (plusCount > minusCount) { if (minusCount * 5 > totalCount) warn("Please double-check %s", orfName); } else { if (plusCount * 5 > totalCount) warn("Please double-check %s", orfName); isRev = TRUE; } cdaFreeAliList(&aliList); return isRev; } void fixAlts(struct altSpec *altList) { struct altSpec *alt; int reverseCount = 0; int inspectCount = 0; int totalCount = 0; for (alt = altList; alt != NULL; alt = alt->next) { ++totalCount; if (alt->alt3 != 0 || alt->alt5 != 0) { ++inspectCount; if (isReverseStrand(alt->hrefTag, alt->orfName)) { int temp = alt->alt3; alt->alt3 = alt->alt5; alt->alt5 = temp; ++reverseCount; } } } printf("Reversed %d of %d 3'/5' (%d total)\n", reverseCount, inspectCount, totalCount); } int main(int argc, char *argv[]) { char *oldName, *newName; struct altSpec *altList; if (argc != 3) { errAbort("fixalts - fixed minus strand 3'/5' mixup onalts file.\n" "Usage:\n" " weedAlts alt.html newAlt.html\n"); } oldName = argv[1]; newName = argv[2]; printf("Reading %s\n", oldName); altList = readAlts(oldName); fixAlts(altList); printf("Writing %s\n", newName); writeAlts(newName, altList); return 0; }