/* Synonym - print out other names for this gene. */ #include "common.h" #include "hash.h" #include "hdb.h" #include "linefile.h" #include "dystring.h" #include "hgGene.h" #include "spDb.h" #include "ccdsGeneMap.h" static void printOurMrnaUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a nucleotide. */ { fprintf(f, "../cgi-bin/hgc?%s&g=mrna&i=%s&c=%s&o=%d&t=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), accession, curGeneChrom, curGeneStart, curGeneEnd, curGeneStart, curGeneEnd, database); } static void printOurRefseqUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a nucleotide. */ { fprintf(f, "../cgi-bin/hgc?%s&g=refGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), accession, curGeneChrom, curGeneStart, curGeneStart, curGeneEnd, database); } static int countAlias(char *id, struct sqlConnection *conn) /* Count how many valid gene symbols to be printed */ { char query[256]; struct sqlResult *sr; int cnt = 0; char **row; safef(query, sizeof(query), "select alias from kgAlias where kgId = '%s' order by alias", id); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { /* skip kgId and the maint gene symbol (curGeneName) */ if ((!sameWord(id, row[0])) && (!sameWord(row[0], curGeneName))) { cnt++; } row = sqlNextRow(sr); } sqlFreeResult(&sr); return(cnt); } char *aliasString(char *id, struct sqlConnection *conn) /* return alias string as it would be printed in html, can free after use */ { char query[256]; struct sqlResult *sr = NULL; char **row; int totalCount; int cnt = 0; totalCount = countAlias(id,conn); if (totalCount > 0) { struct dyString *aliasReturn = dyStringNew(0); dyStringPrintf(aliasReturn, "Alternate Gene Symbols: "); safef(query, sizeof(query), "select alias from kgAlias where kgId = '%s' order by alias", id); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); while (cnt < totalCount) { /* skip kgId and the maint gene symbol (curGeneName) */ if ((!sameWord(id, row[0])) && (!sameWord(row[0], curGeneName))) { dyStringPrintf(aliasReturn, "%s", row[0]); if (cnt < (totalCount-1)) dyStringPrintf(aliasReturn, ", "); cnt++; } row = sqlNextRow(sr); } dyStringPrintf(aliasReturn, "
"); sqlFreeResult(&sr); return dyStringCannibalize(&aliasReturn); } return NULL; } static void printAlias(char *id, struct sqlConnection *conn) /* Print out description of gene given ID. */ { char *aliases = aliasString(id, conn); if (aliases) { hPrintf("%s", aliases); freeMem(aliases); } } static void printGeneSymbol (char *geneId, char *table, char *idCol, struct sqlConnection *conn) /* Print out official Entrez gene symbol from a cross-reference table.*/ { char query[256]; struct sqlResult *sr = NULL; char **row; char *geneSymbol; if (sqlTablesExist(conn, table)) { hPrintf("Entrez Gene Official Symbol: "); safef(query, sizeof(query), "select geneSymbol from %s where %s = '%s'", table, idCol, geneId); sr = sqlGetResult(conn, query); if (sr != NULL) { row = sqlNextRow(sr); geneSymbol = cloneString(row[0]); if (!sameString(geneSymbol, "")) hPrintf("%s
", geneSymbol); } } sqlFreeResult(&sr); } static char *getRefSeqAcc(char *id, char *table, char *idCol, struct sqlConnection *conn) /* Finds RefSeq accession from a cross-reference table. */ { char query[256]; struct sqlResult *sr = NULL; char **row; char *refSeqAcc = NULL; if (sqlTablesExist(conn, table)) { safef(query, sizeof(query), "select refSeq from %s where %s = '%s'", table, idCol, id); sr = sqlGetResult(conn, query); if (sr != NULL) { row = sqlNextRow(sr); refSeqAcc = cloneString(row[0]); } } sqlFreeResult(&sr); return refSeqAcc; } static void printCcds(char *kgId, struct sqlConnection *conn) /* Print out CCDS ids most closely matching the kg. */ { struct ccdsGeneMap *ccdsKgs = NULL; if (sqlTablesExist(conn, "ccdsKgMap")) ccdsKgs = ccdsGeneMapSelectByGene(conn, "ccdsKgMap", kgId, 0.0); if (ccdsKgs != NULL) { struct ccdsGeneMap *ccdsKg; hPrintf("CCDS: "); /* since kg is not by location (even though we have a * curGeneStart/curGeneEnd), we need to use the location in the * ccdsGeneMap */ for (ccdsKg = ccdsKgs; ccdsKg != NULL; ccdsKg = ccdsKg->next) { if (ccdsKg != ccdsKgs) hPrintf(", "); hPrintf("%s", cartSidUrlString(cart), ccdsKg->ccdsId, ccdsKg->chrom, ccdsKg->chromStart, ccdsKg->chromStart, ccdsKg->chromEnd, database, ccdsKg->ccdsId); } hPrintf("
\n"); } } static char *addCommaSpace(char *inStr) /* return all converted character '|' found in inStr to string ', ' */ { char outStr[2048]; char *chp, *chpOut; chp = inStr; chpOut = outStr; while (*chp != '\0') { *chpOut = *chp; if (*chp == '|') { *chpOut = ','; chpOut ++; *chpOut = ' '; } chp++; chpOut++; } *chpOut = '\0'; //return inStr; return strdup(outStr); } static void rgdGene2SynonymPrint(struct section *section, struct sqlConnection *conn, char *rgdGeneId) { char *geneSym = NULL, *geneName = NULL; char query[256], **row; struct sqlResult *sr; if (rgdGeneId != NULL) { safef(query, sizeof(query), "select old_symbol, old_name from rgdGene2Raw where gene_rgd_id = '%s'", rgdGeneId+4L); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { if (row[0][0] != 0 && !sameString(row[0], "n/a")) { geneSym = cloneString(row[0]); hPrintf("Symbol: %s ", addCommaSpace(row[0])); hPrintf("
\n"); } if (row[1][0] != 0 && !sameString(row[0], "n/a")) { geneName = cloneString(row[1]); hPrintf("Name: %s ", addCommaSpace(geneName)); hPrintf("
\n"); } } sqlFreeResult(&sr); safef(query, sizeof(query), "select value from rgdGene2ToRefSeq where name= '%s'", rgdGeneId); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { hPrintf("RefSeq Accession: %s
\n", row[0]); } sqlFreeResult(&sr); safef(query, sizeof(query), "select value from rgdGene2ToUniProt where name= '%s'", rgdGeneId); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { char *spId, *spDisplayId, *oldDisplayId; spId = row[0]; hPrintf("Protein: "); hPrintf("%s\n", spId, spId); /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, spId); if (spDisplayId == NULL) { errAbort("
The corresponding protein %s of this gene is not found in our current UniProtKB DB.", spId); } if (strstr(spDisplayId, spId) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(spId, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")
\n"); } } sqlFreeResult(&sr); } } static void synonymPrint(struct section *section, struct sqlConnection *conn, char *id) /* Print out SwissProt comments - looking up typeId/commentVal. */ { char *protAcc = getSwissProtAcc(conn, spConn, id); char *spDisplayId; char *refSeqAcc = ""; char *mrnaAcc = ""; char *oldDisplayId; char condStr[255]; char *kgProteinID; char *parAcc; /* parent accession of a variant splice protein */ char *chp; if (isRgdGene(conn)) { rgdGene2SynonymPrint(section,conn, id); return; } if (sqlTablesExist(conn, "kgAlias")) printAlias(id, conn); if (sameWord(genome, "Zebrafish")) { char *xrefTable = "ensXRefZfish"; char *geneIdCol = "ensGeneId"; /* get Gene Symbol and RefSeq accession from Zebrafish-specific */ /* cross-reference table */ printGeneSymbol(id, xrefTable, geneIdCol, conn); refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn); hPrintf("ENSEMBL ID: %s", id); } else { char query[256]; char *toRefTable = genomeOptionalSetting("knownToRef"); if (toRefTable != NULL && sqlTableExists(conn, toRefTable)) { safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable, id); refSeqAcc = emptyForNull(sqlQuickString(conn, query)); } if (sqlTableExists(conn, "kgXref")) { safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id); mrnaAcc = emptyForNull(sqlQuickString(conn, query)); } if (sameWord(genome, "C. elegans")) hPrintf("WormBase ID: %s
", id); else hPrintf("UCSC ID: %s
", id); } if (refSeqAcc[0] != 0) { hPrintf("RefSeq Accession: %s
\n", refSeqAcc); } else if (mrnaAcc[0] != 0) { safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc); if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL) { hPrintf("Representative RNA: %s
\n", mrnaAcc); } else /* do not show URL link if it is not found in gbCdnaInfo */ { hPrintf("Representative RNA: %s ", mrnaAcc); } } if (protAcc != NULL) { kgProteinID = cloneString(""); if (hTableExists(sqlGetDatabase(conn), "knownGene") && (isNotEmpty(curGeneChrom) && differentWord(curGeneChrom,"none"))) { safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%d and txEnd=%d", id, curGeneChrom, curGeneStart, curGeneEnd); kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr); } hPrintf("Protein: "); if (strstr(kgProteinID, "-") != NULL) { parAcc = cloneString(kgProteinID); chp = strstr(parAcc, "-"); *chp = '\0'; /* show variant splice protein and the UniProt link here */ hPrintf("%s, splice isoform of ", kgProteinID, kgProteinID); hPrintf("%s\n", parAcc, parAcc); } else { hPrintf("%s\n", protAcc, protAcc); } /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, protAcc); if (spDisplayId == NULL) { errAbort("
%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc); } if (strstr(spDisplayId, protAcc) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(protAcc, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")
\n"); } } printCcds(id, conn); } struct section *synonymSection(struct sqlConnection *conn, struct hash *sectionRa) /* Create synonym (aka Other Names) section. */ { struct section *section = sectionNew(sectionRa, "synonym"); section->print = synonymPrint; return section; }