/* domains - do protein domains section. */
/* This module is adopted from the domains module of hgGene. */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "dystring.h"
#include "spDb.h"
#include "hdb.h"
#include "pbTracks.h"
char *samGenomeDb(char *proteinId)
/* Determin if a protein belongs to a genome DB that has SAM results */
/* This function will be updated as SAM applies to more genomes */
{
char condStr[128];
char *taxon;
safef(condStr, sizeof(condStr), "acc='%s'", proteinId);
taxon = sqlGetField(UNIPROT_DB_NAME, "accToTaxon", "taxon", condStr);
if (taxon == NULL) return(NULL);
if (sameWord(taxon, "4932"))
{
return(strdup("sacCer1"));
}
else
{
return(NULL);
}
}
void modBaseAnchor(char *swissProtAcc)
/* Print out anchor to modBase. */
{
hPrintf("", swissProtAcc);
}
void domainsPrint(struct sqlConnection *spConn, char *swissProtAcc)
/* Print out protein domains. */
{
struct slName *el, *list;
char *samDb;
char condStr[128];
char *parentId;
char *kgId = NULL;
/* Use parent protein ID for domain links */
/* There may be cases that a specific variant may have some domain spliced out */
/* But, it is better to cover most of them, than none at all */
safef(condStr, sizeof(condStr), "variant='%s'", swissProtAcc);
parentId = sqlGetField(PROTEOME_DB_NAME, "spVariant", "parent", condStr);
list = spExtDbAcc1List(spConn, parentId, "Interpro");
if (list != NULL)
{
char query[256], **row;
struct sqlResult *sr;
hPrintf("InterPro Domains: ");
hPrintf("",
swissProtAcc);
hPrintf("Graphical view of domain structure
\n
");fflush(stdout);
safef(query, sizeof(query),
"select extAcc1,extAcc2 from extDbRef,extDb"
" where extDbRef.acc = '%s'"
" and extDb.val = 'Interpro' and extDb.id = extDbRef.extDb"
, parentId);
sr = sqlGetResult(spConn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
hPrintf("- ", row[0]);
hPrintf("%s - %s
\n", row[0], row[1]);
}
hPrintf("
\n");
slFreeList(&list);
}
if (kgVersion == KG_III)
{
struct sqlConnection *hgConn; /* Connection to genome database. */
hgConn = sqlConnect(database);
safef(condStr, sizeof(condStr), "spId='%s'", swissProtAcc);
kgId = sqlGetField(database, "kgXref", "kgId", condStr);
/* Do Pfam domains here. */
list = NULL;
if (kgId != NULL) list = getPfamDomainList(hgConn, kgId);
if (list != NULL)
{
hPrintf("Pfam Domains:
");
for (el = list; el != NULL; el = el->next)
{
char query[256];
char *description;
safef(query, sizeof(query),
"select description from %s.pfamDesc where pfamAC='%s'", database, el->name);
description = sqlQuickString(hgConn, query);
if (description == NULL)
description = cloneString("n/a");
hPrintf("",
el->name);
hPrintf("%s - %s
\n", el->name, description);
freez(&description);
}
slFreeList(&list);
hPrintf("
\n");
}
/* Do SCOP domains here */
list = NULL;
if (kgId != NULL) list = getDomainList(hgConn, kgId, "Scop");
if (list != NULL)
{
hPrintf("SCOP Domains:
");
for (el = list; el != NULL; el = el->next)
{
char query[256];
char *description;
safef(query, sizeof(query),
"select description from %s.scopDesc where acc='%s'", database, el->name);
description = sqlQuickString(hgConn, query);
if (description == NULL)
description = cloneString("n/a");
hPrintf("",
el->name);
hPrintf("%s - %s
\n", el->name, description);
freez(&description);
}
slFreeList(&list);
hPrintf("
\n");
}
}
else
{
list = spExtDbAcc1List(spConn, parentId, "Pfam");
if (list != NULL)
{
hPrintf("Pfam Domains:\n");fflush(stdout);
for (el = list; el != NULL; el = el->next)
{
char query[256];
char *description;
safef(query, sizeof(query), "select description from %s.pfamDesc where pfamAC='%s'",
protDbName, el->name);
description = sqlQuickString(spConn, query);
if (description == NULL)
description = cloneString("n/a");
hPrintf("- ",
el->name);
hPrintf("%s - %s
\n", el->name, description);
freez(&description);
}
slFreeList(&list);
hPrintf("
\n");
}
}
/* do not use parent protein, since 3D structure is determined by specific protein sequence */
list = spExtDbAcc1List(spConn, swissProtAcc, "PDB");
if (list != NULL)
{
char query[256], **row;
struct sqlResult *sr;
int column = 0, maxColumn=4, rowCount=0;
hPrintf("Protein Data Bank (PDB) 3-D Structure
");
safef(query, sizeof(query),
"select extAcc1,extAcc2 from extDbRef,extDb"
" where extDbRef.acc = '%s'"
" and extDb.val = 'PDB' and extDb.id = extDbRef.extDb"
, swissProtAcc);
sr = sqlGetResult(spConn, query);
hPrintf("\n");
while ((row = sqlNextRow(sr)) != NULL)
{
if (++column > maxColumn)
{
hPrintf("
");
column = 1;
if (rowCount == 0)
{
hPrintf("To conserve bandwidth, only the images from the first %d structures are shown.", maxColumn);
hPrintf(" |
");
}
++rowCount;
}
hPrintf("");
hPrintf("", row[0]);
if (rowCount < 1)
hPrintf("![](\"http://www.rcsb.org/pdb/images/%s_asym_r_250.jpe\") ", row[0]);
hPrintf("%s - %s \n", row[0], row[1]);
hPrintf(" | ");
}
hPrintf("
\n");
hPrintf("
\n");
slFreeList(&list);
}
/* if this protein belongs to a genome having SAM-T02 results, show the sub-section */
samDb = samGenomeDb(swissProtAcc);
if (samDb != NULL)
{
doSamT02(swissProtAcc, samDb);
}
/* do not use parent ID, again 3D structure is dependent on specific sequence */
/* Do modBase link. */
{
hPrintf("ModBase Predicted Comparative 3D Structure on ");
modBaseAnchor(swissProtAcc);
hPrintf("%s", swissProtAcc);
hPrintf("
\n");
hPrintf("");
hPrintf("");
modBaseAnchor(swissProtAcc);
hPrintf("![](\"http://salilab.org/modbaseimages/image/modbase.jpg?database_id=%s\") | ", swissProtAcc);
hPrintf("");
modBaseAnchor(swissProtAcc);
hPrintf("![](\"http://salilab.org/modbaseimages/image/modbase.jpg?database_id=%s&axis=x°ree=90\") | ", swissProtAcc);
hPrintf("");
modBaseAnchor(swissProtAcc);
hPrintf("![](\"http://salilab.org/modbaseimages/image/modbase.jpg?database_id=%s&axis=y°ree=90\") | ", swissProtAcc);
hPrintf("
\n");
hPrintf("Front | ");
hPrintf("Top | ");
hPrintf("Side | ");
hPrintf("
\n");
hPrintf("The pictures above may be empty if there is no "
"ModBase structure for the protein. The ModBase structure "
"frequently covers just a fragment of the protein. You may "
"be asked to log onto ModBase the first time you click on the "
"pictures. It is simplest after logging in to just click on "
"the picture again to get to the specific info on that model.");
}
}