#include "common.h" #include "linefile.h" #include "hash.h" #include "jksql.h" #include "hdb.h" #include "web.h" #include "cdsEvidence.h" #include "txInfo.h" #include "trackDb.h" #include "hgc.h" void showTxInfo(char *geneName, struct trackDb *tdb, char *txInfoTable) /* Print out stuff from txInfo table. */ { struct sqlConnection *conn = hAllocConn(database); if (sqlTableExists(conn, txInfoTable)) { char query[512]; safef(query, sizeof(query), "select * from %s where name='%s'", txInfoTable, geneName); struct sqlResult *sr = sqlGetResult(conn, query); char **row; if ((row = sqlNextRow(sr)) != NULL) { struct txInfo *info = txInfoLoad(row); webNewSection("Transcript Information"); webPrintLinkTableStart(); webPrintLinkCell("category:"); webPrintLinkCell(info->category); webPrintLinkCell("nonsense-mediated-decay:"); webPrintLinkCell(info->nonsenseMediatedDecay ? "yes" : "no"); webPrintLinkCell("RNA accession:"); webPrintLinkCell(info->sourceAcc); webPrintLinkTableNewRow(); webPrintLinkCell("exon count:"); webPrintIntCell(info->exonCount); webPrintLinkCell("CDS single in 3' UTR:"); webPrintLinkCell(info->cdsSingleInUtr3 ? "yes" : "no"); webPrintLinkCell("RNA size:"); webPrintIntCell(info->sourceSize); webPrintLinkTableNewRow(); webPrintLinkCell("ORF size:"); webPrintIntCell(info->orfSize); webPrintLinkCell("CDS single in intron:"); webPrintLinkCell(info->cdsSingleInIntron ? "yes" : "no"); webPrintLinkCell("Alignment % ID:"); webPrintDoubleCell(info->aliIdRatio*100); webPrintLinkTableNewRow(); webPrintLinkCell("txCdsPredict score:"); webPrintDoubleCell(info->cdsScore); webPrintLinkCell("frame shift in genome:"); webPrintLinkCell(info->genomicFrameShift ? "yes" : "no"); webPrintLinkCell("% Coverage:"); webPrintDoubleCell(info->aliCoverage*100); webPrintLinkTableNewRow(); webPrintLinkCell("has start codon:"); webPrintLinkCell(info->startComplete ? "yes" : "no"); webPrintLinkCell("stop codon in genome:"); webPrintLinkCell(info->genomicStop ? "yes" : "no"); webPrintLinkCell("# of Alignments:"); webPrintIntCell(info->genoMapCount); webPrintLinkTableNewRow(); webPrintLinkCell("has end codon:"); webPrintLinkCell(info->endComplete ? "yes" : "no"); webPrintLinkCell("retained intron:"); webPrintLinkCell(info->retainedIntron ? "yes" : "no"); webPrintLinkCell("# AT/AC introns"); webPrintIntCell(info->atacIntrons); webPrintLinkTableNewRow(); webPrintLinkCell("selenocysteine:"); webPrintLinkCell(info->selenocysteine ? "yes" : "no"); webPrintLinkCell("end bleed into intron:"); webPrintIntCell(info->bleedIntoIntron); webPrintLinkCell("# strange splices:"); webPrintIntCell(info->strangeSplice); webPrintLinkTableEnd(); txInfoFree(&info); } sqlFreeResult(&sr); } hFreeConn(&conn); printf("%s", "The table above summarizes many aspects of this transcripts. Here is a more \n" "detailed description of each of the fields than can fit in the label. Also\n" "see the CDS Prediction Information table below for additional information\n" "relevant to the predicted protein product if any.\n" "\n" "\n"); } void showCdsEvidence(char *geneName, struct trackDb *tdb, char *evTable) /* Print out stuff from cdsEvidence table. */ { struct sqlConnection *conn = hAllocConn(database); double bestScore = 0; if (sqlTableExists(conn, evTable)) { webNewSection("CDS Prediction Information"); char query[512]; safef(query, sizeof(query), "select count(*) from %s where name='%s'", evTable, geneName); if (sqlQuickNum(conn, query) > 0) { safef(query, sizeof(query), "select * from %s where name='%s' order by score desc", evTable, geneName); struct sqlResult *sr = sqlGetResult(conn, query); char **row; webPrintLinkTableStart(); webPrintLabelCell("ORF
size"); webPrintLabelCell("start in
transcript"); webPrintLabelCell("end in
transcript"); webPrintLabelCell("source"); webPrintLabelCell("accession"); webPrintLabelCell("ad-hoc
score"); webPrintLabelCell("start
codon"); webPrintLabelCell("end
codon"); webPrintLabelCell("piece
count"); webPrintLabelCell("piece list"); webPrintLabelCell("frame"); webPrintLinkTableNewRow(); while ((row = sqlNextRow(sr)) != NULL) { struct cdsEvidence *ev = cdsEvidenceLoad(row); webPrintIntCell(ev->end - ev->start); int i; webPrintIntCell(ev->start+1); webPrintIntCell(ev->end); webPrintLinkCell(ev->source); webPrintLinkCell(ev->accession); webPrintLinkCellRightStart(); printf("%3.2f", ev->score); bestScore = max(ev->score, bestScore); webPrintLinkCellEnd(); webPrintLinkCell(ev->startComplete ? "yes" : "no"); webPrintLinkCell(ev->endComplete ? "yes" : "no"); webPrintIntCell(ev->cdsCount); webPrintLinkCellRightStart(); for (i=0; icdsCount; ++i) { int start = ev->cdsStarts[i]; int end = start + ev->cdsSizes[i]; printf("%d-%d ", start+1, end); } webPrintLinkCellEnd(); webPrintLinkCellRightStart(); for (i=0; icdsCount; ++i) { if (i>0) printf(","); printf("%d", ev->cdsStarts[i]%3 + 1); } webPrintLinkCellEnd(); webPrintLinkTableNewRow(); } sqlFreeResult(&sr); webPrintLinkTableEnd(); printf("This table shows CDS predictions for this transcript from a number of " "sources including alignments against UniProtKB proteins, alignments against Genbank " "mRNAs with CDS regions annotated by the sequence submitter, and " "Victor Solovyev's bestorf program. Each prediction is assigned an ad-hoc score " "score is based on several factors including the quality of " "any associated alignments, the quality of the source, and the length of the " "prediction. For RefSeq transcripts with annotated CDSs the ad-hoc score " "is over a million unless there are severe problems mapping the mRNA to the " "genome. In other cases the score generally ranges from 0 to 50,000. " "The highest scoring prediction in this table is used to define the CDS " "boundaries for this transcript.

If no score is 2000 or more, the transcript " "is considered non-coding. In cases where the CDS is subject to " "nonsense-mediated decay the CDS is removed. The CDS is also removed " "from transcripts when evidence points to it being in an artifact of an " "incompletely processed transcript. Specifically if the CDS is entirely " "enclosed in the 3' UTR or an intron of a refSeq or other high quality " "transcript, the CDS is removed."); } else { printf("no significant CDS prediction found, likely %s is noncoding", geneName); } } hFreeConn(&conn); }