/* spliceProbeVis.c - Visualize some plots for data. Since R is slow
to call build up entire script to make all plots and then call R
once, have to build up html in a buffer too or else browser starts
looking for files that aren't there yet.
*/
#include "common.h"
#include "hdb.h"
#include "cheapcgi.h"
#include "htmshell.h"
#include "portable.h"
#include "dMatrix.h"
#include "dystring.h"
#include "mouseAPSetEventMap.h"
/* Don't start printing web page until R creates plots. */
struct dyString *html = NULL;
/* The event that this splicing event is represented by. */
struct mouseAPSetEventMap *altEvent = NULL;
char *abbv = "abbv = c( 'ce1','ce2','ce3','ch1','ch2','ch3','co1','co2','co3','ct1','ct2','ct3',\n"
"'es1','es2','es3','he1','he2','he3','hb1','hb2','hb3','ki1','ki2','ki3',\n"
"'li1','li2','li3','lu1','lu2','lu3','mg1','mg2','mg3','ob1','ob2','ob3',\n"
"'ov1','ov2','ov3','pi1','pi2','pi3','sg1','sg2','sg3','sm1','sm2','sm3','sm4',\n"
"'si1','si2','si3','sc1','sc2','sc3','sp1','sp2','sp3','st1','st2','st3',\n"
"'te1','te2','te3','th1','th2','th3');\n";
char *key = "Tisues: cerebellum (ce), cerebral hemisphere (ch), colon (co), cortex (ct), esophagus (es), heart (he), hind brain (hb), kidney (ki), liver (li), lung (lu), mammary gland (mg), olfactory bulb (ob), ovary (ov), pineal gland (pg), salivary gland (sg), skeletal muscle (sm), small intestine (si), spinal cord (sc), spleen (sp), stomach (st), testicle (te), thalamus (th)";
/* char *brainTissues = "brainTissues = c(1, 2, 3, 4, 5, 6, 10, 11, 12, 19, 20, 21, 34, 35, 36, 40, 41, 42, 65, 66, 67);\n"; */
/* char *nonBrainTissues = "nonBrainTissues = c(7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27, \n" */
/* "28, 29, 30, 31, 32, 33, 37, 38, 39, 43, 44, 45, 46, 47, 48, \n" */
/* "49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);\n"; */
char *isBrain = "isBrain = c(1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1);\n";
char *isMuscle = "isBrain =c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);\n";
char *rHeader = "library(MASS);\n"
"rlmZero <- function(x,y){\n"
"# Compute the rlm of all points\n"
"# constrained to go through the origin.\n"
"fit <- rlm(y ~ x + 0)\n"
"coef <- c(0,0);\n"
"coef[1]<-0\n"
"coef[2]<-fit$coef[1]\n"
"res<-y-coef[2]*x-coef[1]\n"
"list(coef=coef,residuals=res)\n"
"} \n";
char *plotRegression =
"rX = range(incDat);\n"
"rY = range(skipDat);\n"
"incBrain = incDat[isBrain == 1 & expressed];\n"
"skipBrain = skipDat[isBrain == 1 & expressed];\n"
"incNotBrain = incDat[isBrain == 0 & expressed];\n"
"skipNotBrain = skipDat[isBrain == 0 & expressed];\n"
"plot.default(-1, -1, xlim=rX, ylim=rY, pch=21, main='%s %s - Red, Not %s - Blue', \n"
" col = 'white', xlab='Include intensity', ylab='Skip intensity');\n"
"if(length(abbv[isBrain == 1 & expressed == T]) != 0) {\n"
"text(incBrain, skipBrain, abbv[isBrain == 1 & expressed], col='#A52A2A')\n"
"}\n"
"if(length(abbv[isBrain == 1 & expressed == F]) != 0) {\n"
"text(incDat[isBrain == 1 & expressed == F], skipDat[isBrain == 1 & expressed == F], abbv[isBrain == 1 & expressed == F], col='#F38291')\n"
"}\n"
"if(length(abbv[isBrain == 0 & expressed == T]) != 0) {\n"
"text(incNotBrain, skipNotBrain, abbv[isBrain == 0 & expressed], col='blue');\n"
"}\n"
"if(length(abbv[isBrain == 0 & expressed == F]) != 0) {\n"
"text(incDat[isBrain == 0 & expressed == F], skipDat[isBrain == 0 & expressed == F], abbv[isBrain == 0 & expressed == F], col='#88D6FB')\n"
"}\n"
"if(length(incBrain) != 0 && length(incNotBrain) != 0) {\n"
"abline(rlmZero(incBrain,skipBrain)$coef, col='#A52A2A');\n"
"abline(rlmZero(incNotBrain,skipNotBrain)$coef, col='blue');\n"
"}\n"
;
void makePlotLink(char *type, char *fileName)
/* Write out an image or pdf link depending on pdf or image. */
{
if(cgiBoolean("pdf"))
dyStringPrintf(html, "%s
\n",
fileName, type);
else
dyStringPrintf(html, "
\n", fileName);
}
void initPlotOutput(struct dyString *script, char *fileName)
/* Init eithe a pdf or bitmap device. */
{
if(cgiBoolean("pdf"))
dyStringPrintf(script, "pdf(file='%s', width=11, height=8);\n", fileName);
else
dyStringPrintf(script, "bitmap('%s', width=4.5, height=3, res=200);\n", fileName);
}
void initRegPlotOutput(struct dyString *script, char *fileName)
/* Init eithe a pdf or bitmap device. */
{
if(cgiBoolean("pdf"))
dyStringPrintf(script, "pdf(file='%s', width=6, height=6);\n", fileName);
else
dyStringPrintf(script, "bitmap('%s', width=3, height=3, res=200);\n", fileName);
}
void constructQueryForEvent(struct dyString *query, char *skipPSet, char *tableName)
/* Construct a query for all the probe sets in a particular event for
a matrix table. */
{
int i = 0;
char eventQuery[256];
struct mouseAPSetEventMap *event = NULL;
struct sqlConnection *conn = hAllocConn();
safef(eventQuery, sizeof(eventQuery),
"select * from mouseAPSetEventMap where skipPSet = '%s';",
skipPSet);
/* Check our little cache. */
if(altEvent == NULL)
altEvent = event = mouseAPSetEventMapLoadByQuery(conn, eventQuery);
else
event = altEvent;
if(event == NULL)
errAbort("Couldn't find an alternative events for: %s", skipPSet);
assert(query);
dyStringClear(query);
dyStringPrintf(query, "select * from %s where ", tableName);
for(i = 0; i < event->incCount; i++)
dyStringPrintf(query, "name like '%s' or ", event->incPSets[i]);
for(i = 0; i < event->geneCount; i++)
dyStringPrintf(query, "name like '%s' or ", event->genePSets[i]);
dyStringPrintf(query, "name like '%s' order by name;", skipPSet);
hFreeConn(&conn);
}
void closePlotOutput(struct dyString *script)
/* Turn off device. */
{
dyStringPrintf(script, "dev.off();\n");
}
struct dMatrix *dataFromTable(char *tableName, char *query)
/* Read data from table name with rowname. */
{
struct sqlConnection *conn = hAllocConn();
struct slName *colNames = sqlListFields(conn, tableName);
struct slName *name = NULL;
int count = 0;
struct dMatrix *dM = NULL;
struct sqlResult *sr = NULL;
char **row = NULL;
int colIx = 0;
/* Allocate some initial memory. */
AllocVar(dM);
dM->colCount = slCount(colNames) -1;
dM->rowCount = 1;
AllocArray(dM->matrix, dM->rowCount);
AllocArray(dM->rowNames, dM->rowCount);
AllocArray(dM->matrix[0], dM->colCount);
AllocArray(dM->colNames, dM->colCount);
for(name = colNames->next; name != NULL; name = name->next)
dM->colNames[count++] = cloneString(name->name);
/* Execute our query. */
sr = sqlGetResult(conn, query);
count = 0;
/* Read out the data. */
while((row = sqlNextRow(sr)) != NULL)
{
/* Expand matrix data as necessary. */
if(count > 0)
{
ExpandArray(dM->matrix, dM->rowCount, dM->rowCount+1);
AllocArray(dM->matrix[dM->rowCount], dM->colCount);
ExpandArray(dM->rowNames, dM->rowCount, dM->rowCount+1);
dM->rowCount++;
}
dM->rowNames[count] = cloneString(row[0]);
for(colIx = 0; colIx < dM->colCount; colIx++)
dM->matrix[count][colIx] = atof(row[colIx+1]);
count++;
}
if(count == 0)
errAbort("Didn't find any results for query:\n%s", query);
sqlFreeResult(&sr);
hFreeConn(&conn);
return dM;
}
void plotMatrixRows(struct dMatrix *mat, struct dyString *script,
char *title, char *fileName, char *type, boolean linesOnly)
/* Write a file which will produce an R script to
make a plot. */
{
int i = 0, j = 0;
double minVal = BIGNUM, maxVal = 0;
initPlotOutput(script, fileName);
/* Write out all the data. */
dyStringPrintf(script, "\n dat = c(");
for(i = 0; i < mat->rowCount; i++)
{
for(j = 0; j < mat->colCount; j++)
{
dyStringPrintf(script, "%.2f,", mat->matrix[i][j]);
maxVal = max(mat->matrix[i][j], maxVal);
minVal = min(mat->matrix[i][j], minVal);
}
dyStringPrintf(script, "\n");
}
dyStringPrintf(script, ");\n");
/* Write the row names. */
dyStringPrintf(script, "rownames = c(");
for(i = 0; i < mat->rowCount; i++)
dyStringPrintf(script, "'%s',", mat->rowNames[i]);
dyStringPrintf(script, ");\n");
/* Write the column names. */
dyStringPrintf(script, "colnames = c(");
for(i = 0; i < mat->colCount; i++)
dyStringPrintf(script, "'%s',", mat->colNames[i]);
dyStringPrintf(script, ");\n");
/* Create the matrix. */
dyStringPrintf(script, "mat = matrix(data=dat, nrow=%d, ncol=%d, byrow=T, dimnames=list(rownames, colnames));\n",
mat->rowCount, mat->colCount);
dyStringPrintf(script, "colorIx = 0;\n");
dyStringPrintf(script, "par(mar=c(10, 4, 4, 2) + 0.1);\n");
/* Create initial plot. */
dyStringPrintf(script,
"cols = rainbow(length(unique(rownames)));\n"
"plot.default( 0, 0, ylim=c(%.2f, %.2f), xlim=c(1,%d), "
"main='%s',"
"col='white',"
"axes=F, ylab='%s', xlab=''"
");\n",
minVal, maxVal,
mat->colCount,
title,
type);
dyStringPrintf(script, "lastRowName = 'first';\n");
/* Loop through and plot individual rows. */
dyStringPrintf(script, "for(row in 1:%d) {\n", mat->rowCount);
dyStringPrintf(script, "if(lastRowName != rownames[row]) { colorIx = colorIx+1; lastRowName = rownames[row]; }\n");
dyStringPrintf(script, "lines(c(1:%d), mat[row,], col=cols[colorIx], pch=20, type='%c');\n",
mat->colCount, linesOnly ? 'l' : 'b');
dyStringPrintf(script, "}\n");
/* Do the axis and the legend. */
dyStringPrintf(script, "axis(2);\n");
dyStringPrintf(script, "axis(1, at=c(1:%d), labels=colnames, las=3);\n", mat->colCount);
dyStringPrintf(script, "legend(1, .95 * %.2f, unique(rownames), fill=cols);\n", maxVal);
closePlotOutput(script);
}
void touchBlank(char *fileName)
/* Make an empty file. */
{
FILE *out = NULL;
assert(fileName);
out = mustOpen(fileName, "w");
carefulClose(&out);
}
void doRegressionPlot(struct dyString *script, char *skipPset,
char *incTable, char *skipTable, char *geneTable)
/* Put up a regression plot. */
{
struct dMatrix *skip = NULL, *inc = NULL, *gene = NULL;
char query[256];
int i = 0;
struct tempName regPlot;
double thresh = .75;
if(cgiBoolean("allPoints"))
thresh = -1;
if(cgiBoolean("pdf"))
makeTempName(®Plot, "sp", ".pdf");
else
makeTempName(®Plot, "sp", ".png");
touchBlank(regPlot.forCgi);
safef(query, sizeof(query), "select * from %s where name like '%%%s%%';",
incTable, skipPset);
inc = dataFromTable(incTable, query);
safef(query, sizeof(query), "select * from %s where name like '%%%s%%';",
skipTable, skipPset);
skip = dataFromTable(skipTable, query);
safef(query, sizeof(query), "select * from %s where name like '%%%s%%';",
geneTable, skipPset);
gene = dataFromTable(geneTable, query);
initRegPlotOutput(script, regPlot.forCgi);
dyStringPrintf(script, "incDat = c(");
for(i = 0; i< inc->colCount; i++)
dyStringPrintf(script, "%.4f,", inc->matrix[0][i]);
dyStringPrintf(script, ");\n");
dyStringPrintf(script, "skipDat = c(");
for(i = 0; i< skip->colCount; i++)
dyStringPrintf(script, "%.4f,", skip->matrix[0][i]);
dyStringPrintf(script, ");\n");
dyStringPrintf(script, "geneDat = c(");
for(i = 0; i< gene->colCount; i++)
dyStringPrintf(script, "%.4f,", gene->matrix[0][i]);
dyStringPrintf(script, ");\n");
dyStringPrintf(script, "expressed = geneDat > %.4f;\n", thresh);
dyStringPrintf(script, abbv);
if(cgiBoolean("muscle"))
{
dyStringPrintf(script, isMuscle);
dyStringPrintf(script, plotRegression, altEvent->geneName, "Muscle", "Muscle");
}
else
{
dyStringPrintf(script, isBrain);
dyStringPrintf(script, plotRegression, altEvent->geneName, "Brain", "Brain");
}
closePlotOutput(script);
makePlotLink("Include vs. Skip", regPlot.forCgi);
dyStringPrintf(html, "
\n");
dyStringPrintf(html, "Tissues for which the gene expression was too low to be considered are in lighter colors. \n"); dyStringPrintf(html, "%s\n", key); dyStringPrintf(html, " |