/* ccdsClick - click handling for CCDS track and related functions */ #include "common.h" #include "hgc.h" #include "ccdsClick.h" #include "ccdsInfo.h" #include "ccdsNotes.h" #include "ccdsGeneMap.h" #include "geneSimilarities.h" #include "genbank.h" #include "genePred.h" #include "genePredReader.h" #include "ensFace.h" #include "mgcClick.h" #include "htmshell.h" static struct ccdsInfo *getCcdsInfoForSrcDb(struct sqlConnection *conn, char *acc) /* Get a ccdsInfo object for a RefSeq, ensembl, or vega gene, if it * exists, otherwise return NULL */ { if (sqlTableExists(conn, "ccdsInfo")) return ccdsInfoSelectByMrna(conn, acc); else return NULL; } void printCcdsExtUrl(char *ccdsId) /* Print out URL to link to CCDS database at NCBI */ { printf("http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&BUILDS=ALLBUILDS&DATA=%s", ccdsId); } static void printCcdsUrlForSrcDb(struct sqlConnection *conn, struct ccdsInfo *ccdsInfo) /* Print out CCDS hgc URL for a refseq, ensembl, or vega gene, if it * exists. */ { printf("../cgi-bin/hgc?%s&g=ccdsGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), ccdsInfo->ccds, seqName, winStart, winStart, winEnd, database); } void printCcdsForSrcDb(struct sqlConnection *conn, char *acc) /* Print out CCDS hgc link for a refseq, ensembl, or vega gene, if it * exists. */ { struct ccdsInfo *ccdsInfo = getCcdsInfoForSrcDb(conn, acc);; if (ccdsInfo != NULL) { printf("CCDS: %s
", ccdsInfo->ccds); } } struct ccdsGeneMap *getCcdsGenesForMappedGene(struct sqlConnection *conn, char *acc, char *mapTable) /* get a list of ccds genes associated with a current and window from a * mapping table, or NULL */ { struct ccdsGeneMap *ccdsGenes = NULL; if (sqlTableExists(conn, mapTable) && sqlTableExists(conn, "ccdsInfo")) ccdsGenes = ccdsGeneMapSelectByGeneOver(conn, mapTable, acc, seqName, winStart, winEnd, 0.0); slSort(&ccdsGenes, ccdsGeneMapCcdsIdCmp); return ccdsGenes; } void printCcdsUrl(struct sqlConnection *conn, char *ccdsId) /* Print out CCDS hgc URL for a gene */ { printf("../cgi-bin/hgc?%s&g=ccdsGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), ccdsId, seqName, winStart, winStart, winEnd, database); } void printCcdsForMappedGene(struct sqlConnection *conn, char *acc, char *mapTable) /* Print out CCDS links for a gene mapped via a cddsGeneMap table */ { struct ccdsGeneMap *ccdsGenes = getCcdsGenesForMappedGene(conn, acc, mapTable); if (ccdsGenes != NULL) { printf("CCDS: "); struct ccdsGeneMap *gene; for (gene = ccdsGenes; gene != NULL; gene = gene->next) { if (gene != ccdsGenes) printf(", "); printf("ccdsId); printf("\">%s", gene->ccdsId); } printf("
\n"); } } static char *getCcdsGeneSymbol(struct sqlConnection *conn, struct ccdsInfo *rsCcds) /* get the gene name for a CCDS */ { struct ccdsInfo *ci; char accBuf[GENBANK_ACC_BUFSZ], query[256]; char *geneSym = NULL; for (ci = rsCcds; ci != NULL; ci = ci->next) { safef(query, sizeof(query), "select name from refLink where mrnaAcc='%s'", genbankDropVer(accBuf, ci->mrnaAcc)); geneSym = sqlQuickString(conn, query); if (geneSym != NULL) return geneSym; } return NULL; } static char *getCcdsRefSeqSummary(struct sqlConnection *conn, struct ccdsInfo *rsCcds) /* get the refseq summary for a CCDS */ { struct ccdsInfo *ci; char accBuf[GENBANK_ACC_BUFSZ]; char *summary = NULL; for (ci = rsCcds; ci != NULL; ci = ci->next) { summary = getRefSeqSummary(conn, genbankDropVer(accBuf, ci->mrnaAcc)); if (summary != NULL) return summary; } return NULL; } static struct ccdsGeneMap *ccdsGetGenes(struct sqlConnection *conn, char *mapTable, char *ccdsId) /* Get ccdsGeneMap objects for a ccdsId. Returns only * the best overlapping ones (ones with the same cdsSimilariy as * the highest cdsSimilariy. */ { struct ccdsGeneMap *ccdsGenes = NULL, *bestCcdsGenes = NULL, *ccdsGene; /* filter by chrom due to PAR */ ccdsGenes = ccdsGeneMapSelectByCcds(conn, mapTable, ccdsId, seqName, 0.0); if (ccdsGenes == NULL) return NULL; bestCcdsGenes = slPopHead(&ccdsGenes); /* seed with first */ while ((ccdsGene = slPopHead(&ccdsGenes)) != NULL) { if (ccdsGene->cdsSimilarity == bestCcdsGenes->cdsSimilarity) { /* same as best, keep */ slAddHead(&bestCcdsGenes, ccdsGene); } else if (ccdsGene->cdsSimilarity > bestCcdsGenes->cdsSimilarity) { /* new best, replace list */ ccdsGeneMapFreeList(&bestCcdsGenes); bestCcdsGenes = ccdsGene; } else { /* worse, drop */ ccdsGeneMapFree(&ccdsGene); } } /* only keep one of each gene */ slUniqify(&bestCcdsGenes, ccdsGeneMapGeneIdCmp, ccdsGeneMapFree); return bestCcdsGenes; } static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId) /* output a URL to hgGene for a ccds */ { char where[128]; struct genePredReader *gpr; struct genePred *ccdsGene = NULL, *kgGene = NULL; /* get ccds genePred to get location */ safef(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId); gpr = genePredReaderQuery(conn, "ccdsGene", where); ccdsGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (ccdsGene == NULL) errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName); else if (ccdsGene->next != NULL) errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName); /* get KG genePred, as need exact location for link */ safef(where, sizeof(where), "name = '%s' and strand = '%s'", kgId, ccdsGene->strand); gpr = genePredReaderRangeQuery(conn, "knownGene", seqName, ccdsGene->txStart, ccdsGene->txEnd, where); kgGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (kgGene == NULL) errAbort("%s not found in knownGene table for chrom %s", kgId, seqName); else if (kgGene->next != NULL) errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName); printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d", cartSidUrlString(cart), "db", database, "hgg_gene", kgId, "hgg_chrom", seqName, "hgg_start", kgGene->txStart, "hgg_end", kgGene->txEnd); genePredFree(&ccdsGene); genePredFree(&kgGene); } static void ccdsNcbiRows(char *ccdsId, struct ccdsInfo *rsCcds) /* output RefSeq CCDS entries */ { struct ccdsInfo *ci; for (ci = rsCcds; ci != NULL; ci = ci->next) { printf(""); if (ci == rsCcds) printf("RefSeq", slCount(rsCcds)); printf("mrnaAcc); printf("\" TARGET=_blank>%s ", ci->mrnaAcc); printf("protAcc); printf("\" TARGET=_blank>%s", ci->protAcc); printf("\n"); } } static void ccdsHinxtonRows(char *ccdsId, bool isVega,struct ccdsInfo *hinCcds) /* output Ensembl or vega CCDS entries */ { struct ccdsInfo *ci; char *dbArg = isVega ? "&db=vega" : ""; char *ensGenome = ensOrgNameFromScientificName(scientificName); if (ensGenome == NULL) errAbort("%s: ensOrgNameFromScientificName failed", ccdsId); for (ci = hinCcds; ci != NULL; ci = ci->next) { printf(""); if (ci == hinCcds) printf("%s", slCount(hinCcds), (isVega ? "Vega" : "Ensembl")); printf("%s ", ensGenome, ci->mrnaAcc, dbArg, ci->mrnaAcc); printf("%s", ensGenome, ci->protAcc, dbArg, ci->protAcc); printf("\n"); } } static void ccdsKnownGenesRows(struct sqlConnection *conn, char *ccdsId) /* output KnownGenes mapped to CCDS */ { struct ccdsGeneMap *ccdsKgs = ccdsGetGenes(conn, "ccdsKgMap", ccdsId); struct ccdsGeneMap *ccdsKg; for (ccdsKg = ccdsKgs; ccdsKg != NULL; ccdsKg = ccdsKg->next) { char *spId = kgIdToSpId(conn, ccdsKg->geneId); printf(""); if (ccdsKg == ccdsKgs) printf("%s", slCount(ccdsKgs), "UCSC Genes"); printf("geneId); printf("\" TARGET=_blank>%s", ccdsKg->geneId); printf(" "); freez(&spId); printf("\n"); } } static void ccdsMgcRows(struct sqlConnection *conn, char *ccdsId) /* output MGCs mapped to CCDS */ { // only possible to get multiple CCDS genePreds in PAR, since we are linking // to details, not browser, only use the first set. struct geneSimilarities *geneSims = geneSimilaritiesBuildAll(conn, TRUE, ccdsId, "ccdsGene", "mgcGenes"); struct geneSim *mgc; for (mgc = geneSims->genes; mgc != NULL; mgc = mgc->next) { printf(""); if (mgc == geneSims->genes) printf("MGC", slCount(geneSims->genes)); printf("gene->name, mgc->gene->txStart); printf("\">%s", mgc->gene->name); printf(" \n"); } geneSimilaritiesFreeList(&geneSims); } static void writeBasicInfoHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds, struct ccdsInfo *vegaCcds, struct ccdsInfo *ensCcds) /* write table with basic information about the CCDS (2 columns) */ { { printf("\n"); /* gene symbol */ char *geneSym = getCcdsGeneSymbol(conn, rsCcds); if (geneSym != NULL) printf("\n", geneSym); freez(&geneSym); /* description */ char *desc = hGenBankGetDesc(database, rsCcds->mrnaAcc, TRUE); if (desc != NULL) printf("\n", desc); freez(&desc); /* CCDS sequence links */ printf("\n"); printf("\n"); /* CCDS databases */ printf("\n"); printf("\n"); printf("
Gene%s
Description%s
Sequences"); printf(""); hgcAnchorSomewhere("htcGeneMrna", ccdsId, "ccdsGene", seqName); printf("CDS,  "); hgcAnchorSomewhere("htcTranslatedPredMRna", ccdsId, "translate", seqName); printf("protein,  "); hgcAnchorSomewhere( "htcGeneInGenome", ccdsId, "ccdsGene", seqName); printf("genomic"); printf("
CCDS database %s", ccdsId); printf("
\n"); printf("
\n"); } } static void writeLinksHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds, struct ccdsInfo *vegaCcds, struct ccdsInfo *ensCcds) /* write table with links to other browser apps or external databases (3 columns) */ { printf("

Associated Sequences

\n"); printf("\n"); printf("\n"); printf("\n"); printf("\n"); if (sqlTableExists(conn, "ccdsKgMap")) ccdsKnownGenesRows(conn, ccdsId); ccdsNcbiRows(ccdsId, rsCcds); if (vegaCcds != NULL) ccdsHinxtonRows(ccdsId, TRUE, vegaCcds); if (ensCcds != NULL) ccdsHinxtonRows(ccdsId, FALSE, ensCcds); if (sqlTableExists(conn, "mgcGenes")) ccdsMgcRows(conn, ccdsId); printf("
 mRNAProtein
\n"); printf("

Note: mRNA and protein sequences in other gene collections " "may differ from the CCDS sequences.
\n"); } static void writePublicNotesHtml(struct sqlConnection *conn, char *ccdsId) /* write public notes if available */ { struct ccdsNotes *ccdsNotes = NULL; if (sqlTableExists(conn, "ccdsNotes")) ccdsNotes = sqlQueryObjs(conn, (sqlLoadFunc)ccdsNotesLoad, sqlQueryMulti, "select * from ccdsNotes where ccds = \"%s\" order by createDate", ccdsId); if (ccdsNotes != NULL) { htmlHorizontalLine(); printf("

Annotator Notes

\n"); printf("\n"); printf("\n"); printf("\n"); printf("\n"); struct ccdsNotes *note; for (note = ccdsNotes; note != NULL; note = note->next) printf("", note->createDate, note->note); printf("
DateNote
%s%s
\n"); } } static void writeRefSeqSummaryHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds) /* write refseq summary HTML, if a summary is available */ { char *summary = getCcdsRefSeqSummary(conn, rsCcds); if (summary != NULL) { htmlHorizontalLine(); printf("

RefSeq summary of %s

\n", ccdsId); printf("

%s

\n", summary); freez(&summary); } } void doCcdsGene(struct trackDb *tdb, char *ccdsId) /* Process click on a CCDS gene. */ { struct sqlConnection *conn = hAllocConn(database); struct ccdsInfo *rsCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoNcbi); struct ccdsInfo *vegaCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoVega); struct ccdsInfo *ensCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoEnsembl); if (rsCcds == NULL) errAbort("database inconsistency: no NCBI ccdsInfo entries found for %s", ccdsId); if ((vegaCcds == NULL) && (ensCcds == NULL)) errAbort("database inconsistency: no Hinxton ccdsInfo entries found for %s", ccdsId); ccdsInfoMRnaSort(&rsCcds); ccdsInfoMRnaSort(&vegaCcds); ccdsInfoMRnaSort(&ensCcds); cartWebStart(cart, database, "CCDS Gene"); printf("

Consensus CDS Gene %s

\n", ccdsId); writeBasicInfoHtml(conn, ccdsId, rsCcds, vegaCcds, ensCcds); writeLinksHtml(conn, ccdsId, rsCcds, vegaCcds, ensCcds); writePublicNotesHtml(conn, ccdsId); writeRefSeqSummaryHtml(conn, ccdsId, rsCcds); htmlHorizontalLine(); printTrackHtml(tdb); cartWebEnd(); ccdsInfoFreeList(&rsCcds); ccdsInfoFreeList(&vegaCcds); ccdsInfoFreeList(&ensCcds); hFreeConn(&conn); }