/* encodeSynteny - create HTML files to compare syntenic predictions from liftOver and Mercator */
#include "regionOrtho.h"
void usage()
{
errAbort("regionOrtho - merge orthology predictions from liftOver and Mercator.\n"
" generates BED files for the regions. \n"
" (inputs in BED 4 files or BED 4+ tables)\n"
"usage:\n\tregionOrtho { [sourceDb.]sourceTable | [sourcePath/]sourceFile] } \\\n"
" { [toDb.]orthoTable1 | [orthoPath1/]orthoFile1 } \\\n"
" { [toDb.]orthoTable2 | [orthoPath2/]orthoFile2 } \\\n"
" consensusFile.bed order.err\n");
}
struct sizeList *getRegions(char *regionSource, boolean excludeRandoms)
{
struct sizeList *list = NULL, *sl;
if (fileExists(regionSource))
{
struct lineFile *IN = lineFileOpen(regionSource, TRUE);
char *row[4];
while (lineFileRow(IN, row))
{
if (startsWith(row[3], "MEN"))
continue;
if (sameString(row[0], "chrom"))
continue;
if (excludeRandoms && (startsWith(row[0], "chrUn") || endsWith(row[0], "random")))
continue;
AllocVar(sl);
sl->chrom = cloneString(row[0]);
sl->chromStart = atoi(row[1]);
sl->chromEnd = atoi(row[2]);
sl->name = strndup(row[3],6);
sl->size = atoi(row[2])-atoi(row[1]);
slAddHead(&list, sl);
}
lineFileClose(&IN);
}
else
{
struct sqlConnection *conn = sqlConnect("hg17");
char query[1024];
char **row;
struct sqlResult *sr = NULL;
safef(query, sizeof(query),
"select distinct chrom, chromStart, chromEnd, name "
"from %s order by name", regionSource);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
if (sameString(row[0], "chrom"))
continue;
if (excludeRandoms && (startsWith(row[0], "chrUn") || endsWith(row[0], "random")))
continue;
AllocVar(sl);
sl->chrom = cloneString(row[0]);
sl->chromStart = atoi(row[1]);
sl->chromEnd = atoi(row[2]);
sl->name = strndup(row[3],6);
sl->size = atoi(row[2])-atoi(row[1]);
slAddHead(&list, sl);
}
sqlFreeResult(&sr);
sqlDisconnect(&conn);
}
slReverse(&list);
return list;
}
struct sizeList *sizeListNew(char *chrom, int chromStart, int chromEnd, char *name)
/* Return new sizeList. */
{
struct sizeList *s = needMem(sizeof(*s));
s->chrom=strdup(chrom);
s->chromStart=chromStart;
s->chromEnd=chromEnd;
s->name=strdup(name);
return s;
}
struct sizeList *sizeListClone(struct sizeList *list)
/* Return clone of list. */
{
struct sizeList *el, *newEl, *newList = NULL;
for (el = list; el != NULL; el = el->next)
{
newEl = sizeListNew(el->chrom, el->chromStart, el->chromEnd, el->name);
slAddHead(&newList, newEl);
}
slReverse(&newList);
return newList;
}
struct sizeList *unionSizeLists(struct sizeList *a, struct sizeList *b, FILE *err)
{
struct sizeList *s, *t, *u, *c=sizeListClone(a), *d=sizeListClone(b);
boolean didChange=TRUE;
int mergeGaps=20000;
if (a == NULL)
return b;
while (didChange)
{
didChange=FALSE;
for (s = c; s != NULL; s = s->next)
for (t = d; t != NULL; t = t->next)
{
if (t->chrom == NULL || t->name == NULL)
continue;
// printf("%s/%s.%d-%d\t%s/%s.%d-%d\t", s->name, s->chrom, s->chromStart, s->chromEnd, t->name, t->chrom, t->chromStart, t->chromEnd);
if ( !strncmp(s->name, t->name, 6) && sameString(s->chrom, t->chrom) )
if (rangeIntersection(s->chromStart,s->chromEnd,t->chromStart,t->chromEnd)+mergeGaps>0)
{
s->chromStart = min(s->chromStart,t->chromStart);
s->chromEnd = max(s->chromEnd, t->chromEnd);
t->chrom = t->name = NULL; // it would be better to remove the element here
didChange = TRUE;
continue;
}
}
}
for (t = d; t != NULL; t = t->next)
if (t->name != NULL && t->chrom!=NULL)
{
u = sizeListNew(t->chrom, t->chromStart, t->chromEnd, t->name);
slAddTail(c, u);
fprintf(err, "%s\t%d\t%d\t%s\n", t->chrom, t->chromStart, t->chromEnd, t->name);
}
return c;
}
void writeSizeListToBedFile(FILE *File, struct sizeList *sList)
{
struct sizeList *sl=NULL;
char *name;
for ( sl = sList; sl != NULL; sl = sl->next)
{
if (endsWith(sl->name,"+") || endsWith(sl->name,"-"))
chopSuffixAt(sl->name, '_');
fprintf(File, "%s\t%d\t%d\t%s\n", sl->chrom, sl->chromStart, sl->chromEnd, sl->name);
}
}
int main(int argc, char *argv[])
{
char *ortho1;
char *ortho2;
char *consensus;
char *err;
struct sizeList *ortho1List=NULL;
struct sizeList *ortho2List=NULL;
struct sizeList *consensusList=NULL;
FILE *consensusFile=NULL;
FILE *errFile=NULL;
if(argc != 5)
usage();
ortho1 = cloneString(argv[1]); // liftOver
ortho2 = cloneString(argv[2]); // Mercator
consensus = cloneString(argv[3]); // Consensus
err = cloneString(argv[4]); // errors
ortho1List = getRegions(ortho1, FALSE); // liftOver - include random chroms
ortho2List = getRegions(ortho2, TRUE ); // Mercator - exclude random chroms
consensusFile = mustOpen(consensus, "w");
errFile = mustOpen(err, "w");
consensusList = unionSizeLists(ortho1List, ortho2List, errFile);
consensusList = unionSizeLists(consensusList, ortho1List, errFile);
consensusList = unionSizeLists(consensusList, ortho2List, errFile);
writeSizeListToBedFile(consensusFile, consensusList);
return 0;
}