/* pfam - handle pfam columns. This requires a join. */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "localmem.h"
#include "dystring.h"
#include "obscure.h"
#include "jksql.h"
#include "hgNear.h"
static boolean isPfamId(char *name)
/* Return TRUE if this is a pfam ID. */
{
return (name[0] == 'P' && name[1] == 'F'
&& isdigit(name[2]) && isdigit(name[3]) && isdigit(name[4])
&& isdigit(name[5]) && isdigit(name[6]) && name[7] == 0);
}
static void pfamFilterControls(struct column *col,
struct sqlConnection *conn)
/* Print out controls for advanced filter. */
{
hPrintf(
"Terms can include Pfam descriptions such as 'Cytochrome P450'
"
"or Pfam IDs such as PF00067. Please enclose term in single quotes
"
"if it contains multiple words. You may use * and ? wildcards.
\n");
hPrintf("Term(s): ");
advFilterRemakeTextVar(col, "terms", 35);
hPrintf(" Include if ");
advFilterAnyAllMenu(col, "logic", FALSE);
hPrintf("terms match");
}
static struct genePos *pfamAdvFilter(struct column *col,
struct sqlConnection *defaultConn, struct genePos *list)
/* Do advanced filter on for pfam. */
{
char *terms = advFilterVal(col, "terms");
if (terms != NULL)
{
struct sqlConnection *conn = sqlConnect(col->protDb);
char query[256];
struct sqlResult *sr;
struct dyString *dy = newDyString(1024);
char **row;
boolean orLogic = advFilterOrLogic(col, "logic", TRUE);
struct slName *term, *termList = stringToSlNames(terms);
struct hash *passHash = newHash(17);
struct hash *prevHash = NULL;
struct genePos *gp;
/* Build up hash of all genes. */
struct hash *geneHash = newHash(18);
for (gp = list; gp != NULL; gp = gp->next)
hashAdd(geneHash, gp->name, gp);
for (term = termList; term != NULL; term = term->next)
{
/* Build up a list of IDs of descriptions that match term. */
struct slName *idList = NULL, *id;
if (isPfamId(term->name))
{
idList = slNameNew(term->name);
}
else
{
char *sqlWild = sqlLikeFromWild(term->name);
safef(query, sizeof(query),
"select pfamAC from pfamDesc where description like '%s'",
sqlWild);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
id = slNameNew(row[0]);
slAddHead(&idList, id);
}
sqlFreeResult(&sr);
}
if (idList != NULL)
{
/* Build up query that includes all IDs. */
dyStringClear(dy);
dyStringPrintf(dy, "select name from %s where ", col->table);
dyStringPrintf(dy, "value='%s'", idList->name);
for (id = idList->next; id != NULL; id = id->next)
dyStringPrintf(dy, "or value='%s'", id->name);
/* Execute query and put matchers into hash. */
sr = sqlGetResult(defaultConn, dy->string);
while ((row = sqlNextRow(sr)) != NULL)
{
gp = hashFindVal(geneHash, row[0]);
if (gp != NULL)
{
char *name = gp->name;
if (prevHash == NULL || hashLookup(prevHash, name) != NULL)
hashStore(passHash, name);
}
}
sqlFreeResult(&sr);
slFreeList(&idList);
}
if (!orLogic)
{
hashFree(&prevHash);
if (term->next != NULL)
{
prevHash = passHash;
passHash = newHash(17);
}
}
}
list = weedUnlessInHash(list, passHash);
hashFree(&prevHash);
hashFree(&passHash);
dyStringFree(&dy);
sqlDisconnect(&conn);
}
return list;
}
void setupColumnPfam(struct column *col, char *parameters)
/* Setup Pfam column. */
{
setupColumnAssociation(col, parameters);
col->table = cloneString(parameters);
if ((col->protDb = columnSetting(col, "protDb", NULL)) == NULL)
errAbort("Missing required protDb field in column %s", col->name);
col->advFilter = pfamAdvFilter;
col->filterControls = pfamFilterControls;
}