/* splice.c was originally generated by the autoSql program, which also * generated splice.h and splice.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "splice.h" #include "geneGraph.h" #include "dystring.h" struct path *pathCommaIn(char **pS, struct path *ret) /* Create a path out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new path */ { char *s = *pS; int i; if (ret == NULL) AllocVar(ret); ret->tName = sqlStringComma(&s); ret->tStart = sqlSignedComma(&s); ret->tEnd = sqlSignedComma(&s); ret->type = sqlSignedComma(&s); ret->maxVCount = sqlSignedComma(&s); ret->vCount = sqlSignedComma(&s); s = sqlEatChar(s, '{'); AllocArray(ret->vertices, ret->vCount); for (i=0; ivCount; ++i) { ret->vertices[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); ret->upV = sqlSignedComma(&s); ret->downV = sqlSignedComma(&s); ret->bpCount = sqlSignedComma(&s); *pS = s; return ret; } void pathFree(struct path **pEl) /* Free a single dynamically allocated path such as created * with pathLoad(). */ { struct path *el; if ((el = *pEl) == NULL) return; freeMem(el->tName); freeMem(el->vertices); freez(pEl); } void pathFreeList(struct path **pList) /* Free a list of dynamically allocated path's */ { struct path *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; pathFree(&el); } *pList = NULL; } void pathOutput(struct path *el, FILE *f, char sep, char lastSep) /* Print out path. Separate fields with sep. Follow last field with lastSep. */ { int i; if (sep == ',') fputc('"',f); fprintf(f, "%s", el->tName); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->tStart); fputc(sep,f); fprintf(f, "%d", el->tEnd); fputc(sep,f); fprintf(f, "%d", el->type); fputc(sep,f); fprintf(f, "%d", el->maxVCount); fputc(sep,f); fprintf(f, "%d", el->vCount); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; ivCount; ++i) { fprintf(f, "%d", el->vertices[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); fprintf(f, "%d", el->upV); fputc(sep,f); fprintf(f, "%d", el->downV); fputc(sep,f); fprintf(f, "%d", el->bpCount); fputc(lastSep,f); } struct splice *spliceLoad(char **row) /* Load a splice from row fetched with select * from splice * from database. Dispose of this with spliceFree(). */ { struct splice *ret; int sizeOne,i; char *s; AllocVar(ret); ret->vCount = sqlSigned(row[7]); ret->pathCount = sqlSigned(row[10]); ret->tName = cloneString(row[0]); ret->tStart = sqlSigned(row[1]); ret->tEnd = sqlSigned(row[2]); ret->name = cloneString(row[3]); ret->type = sqlSigned(row[4]); strcpy(ret->strand, row[5]); ret->agxId = sqlSigned(row[6]); sqlSignedDynamicArray(row[8], &ret->vPositions, &sizeOne); assert(sizeOne == ret->vCount); sqlUbyteDynamicArray(row[9], &ret->vTypes, &sizeOne); assert(sizeOne == ret->vCount); s = row[11]; for (i=0; ipathCount; ++i) { s = sqlEatChar(s, '{'); slSafeAddHead(&ret->paths, pathCommaIn(&s, NULL)); s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } slReverse(&ret->paths); return ret; } struct splice *spliceLoadAll(char *fileName) /* Load all splice from a whitespace-separated file. * Dispose of this with spliceFreeList(). */ { struct splice *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[12]; while (lineFileRow(lf, row)) { el = spliceLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct splice *spliceLoadAllByChar(char *fileName, char chopper) /* Load all splice from a chopper separated file. * Dispose of this with spliceFreeList(). */ { struct splice *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[12]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = spliceLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct splice *spliceCommaIn(char **pS, struct splice *ret) /* Create a splice out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new splice */ { char *s = *pS; int i; if (ret == NULL) AllocVar(ret); ret->tName = sqlStringComma(&s); ret->tStart = sqlSignedComma(&s); ret->tEnd = sqlSignedComma(&s); ret->name = sqlStringComma(&s); ret->type = sqlSignedComma(&s); sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); ret->agxId = sqlSignedComma(&s); ret->vCount = sqlSignedComma(&s); s = sqlEatChar(s, '{'); AllocArray(ret->vPositions, ret->vCount); for (i=0; ivCount; ++i) { ret->vPositions[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); AllocArray(ret->vTypes, ret->vCount); for (i=0; ivCount; ++i) { ret->vTypes[i] = sqlUnsignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); ret->pathCount = sqlSignedComma(&s); s = sqlEatChar(s, '{'); for (i=0; ipathCount; ++i) { s = sqlEatChar(s, '{'); if(s[0] != '}') slSafeAddHead(&ret->paths, pathCommaIn(&s,NULL)); s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } slReverse(&ret->paths); s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); *pS = s; return ret; } void spliceFree(struct splice **pEl) /* Free a single dynamically allocated splice such as created * with spliceLoad(). */ { struct splice *el; if ((el = *pEl) == NULL) return; freeMem(el->tName); freeMem(el->name); freeMem(el->vPositions); freeMem(el->vTypes); pathFreeList(&el->paths); freez(pEl); } void spliceFreeList(struct splice **pList) /* Free a list of dynamically allocated splice's */ { struct splice *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; spliceFree(&el); } *pList = NULL; } void spliceOutput(struct splice *el, FILE *f, char sep, char lastSep) /* Print out splice. Separate fields with sep. Follow last field with lastSep. */ { int i; if (sep == ',') fputc('"',f); fprintf(f, "%s", el->tName); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->tStart); fputc(sep,f); fprintf(f, "%d", el->tEnd); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->name); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->type); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->strand); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->agxId); fputc(sep,f); fprintf(f, "%d", el->vCount); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; ivCount; ++i) { fprintf(f, "%d", el->vPositions[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; ivCount; ++i) { fprintf(f, "%u", el->vTypes[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); fprintf(f, "%d", el->pathCount); fputc(sep,f); /* Loading path list. */ { struct path *it = el->paths; if (sep == ',') fputc('{',f); for (i=0; ipathCount; ++i) { fputc('{',f); pathCommaOut(it,f); it = it->next; fputc('}',f); fputc(',',f); } if (sep == ',') fputc('}',f); } fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ enum ggEdgeType pathEdgeType(unsigned char *vTypes, int v1, int v2) /* Return edge type. */ { if( (vTypes[v1] == ggHardStart || vTypes[v1] == ggSoftStart) && (vTypes[v2] == ggHardEnd || vTypes[v2] == ggSoftEnd)) return ggExon; else if( (vTypes[v1] == ggHardEnd || vTypes[v1] == ggSoftEnd) && (vTypes[v2] == ggHardStart || vTypes[v2] == ggSoftStart)) return ggSJ; else return ggIntron; } struct bed *pathToBed(struct path *path, struct splice *splice, int source, int sink, boolean spoofEnds) /* Construct a bed for the path. If spoofEnds is TRUE, ensure that there is at least a 1bp exon at splice sites. */ { struct bed *bed = NULL; int vertIx = 0; int *verts = path->vertices; int *vPos = splice->vPositions; unsigned char *vTypes = splice->vTypes; int i = 0; struct dyString *buff = newDyString(256); AllocVar(bed); bed->chrom = cloneString(splice->tName); bed->chromStart = BIGNUM; bed->chromEnd = 0; safef(bed->strand, sizeof(bed->strand), "%s", splice->strand); bed->score = splice->type; AllocArray(bed->chromStarts, path->vCount); AllocArray(bed->blockSizes, path->vCount); /* If necessary tack on a fake exon. */ if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount && pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon) { bed->blockSizes[bed->blockCount] = 1; bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]] - 1; bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]] - 1 ); bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]); bed->blockCount++; } /* For each edge that is an exon count up the base pairs. */ for(vertIx = 0; vertIx < path->vCount - 1; vertIx++) { if(verts[vertIx] != source && verts[vertIx] <= splice->vCount) { /* If exon add up the base pairs. */ if(pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) == ggExon) { bed->blockSizes[bed->blockCount] = vPos[verts[vertIx+1]] - vPos[verts[vertIx]]; bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]]; bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]]); bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]); bed->blockCount++; } } } /* if spoofing ends tack on a 1bp exon as necessary. */ vertIx = path->vCount - 2; if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount && pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon) { bed->blockSizes[bed->blockCount] = 1; bed->chromStarts[bed->blockCount] = vPos[verts[vertIx+1]]; bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx+1]]); bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]+1); bed->blockCount++; } /* Fix up the name and adjust the chromStarts. */ dyStringPrintf(buff, "%s.%d.", splice->name, slIxFromElement(splice->paths, path)); for(i = 0; i < path->vCount; i++) { if(path->vertices[i] != sink && path->vertices[i] <= splice->vCount) dyStringPrintf(buff, "%d,", path->vertices[i]); } if(splice->type == alt5Prime || splice->type == alt3Prime || splice->type == altRetInt || splice->type == altCassette) { int pathIx = slIxFromElement(splice->paths, path); if(pathIx == 0) dyStringPrintf(buff, "-Ex"); else if(pathIx == 1) dyStringPrintf(buff, "-Inc"); } bed->name = cloneString(buff->string); for(i = 0; i < bed->blockCount; i++) bed->chromStarts[i] -= bed->chromStart; /* If we don't have any blocks, quit now. */ if(bed->blockCount == 0) bedFree(&bed); dyStringFree(&buff); return bed; }