/* hgLiftOver - CGI-script to convert coordinates using chain files */ #include "common.h" #include "errabort.h" #include "hCommon.h" #include "jksql.h" #include "portable.h" #include "linefile.h" #include "dnautil.h" #include "fa.h" #include "cheapcgi.h" #include "htmshell.h" #include "hdb.h" #include "hui.h" #include "cart.h" #include "web.h" #include "hash.h" #include "liftOver.h" #include "liftOverChain.h" /* CGI Variables */ #define HGLFT_USERDATA_VAR "hglft_userData" /* typed/pasted in data */ #define HGLFT_DATAFILE_VAR "hglft_dataFile" /* file of data to convert */ #define HGLFT_FROMORG_VAR "hglft_fromOrg" /* FROM organism */ #define HGLFT_FROMDB_VAR "hglft_fromDb" /* FROM assembly */ #define HGLFT_TOORG_VAR "hglft_toOrg" /* TO organism */ #define HGLFT_TODB_VAR "hglft_toDb" /* TO assembly */ #define HGLFT_ERRORHELP_VAR "hglft_errorHelp" /* Print explanatory text */ #define HGLFT_REFRESHONLY_VAR "hglft_doRefreshOnly" /* Just refresh drop-down lists */ #define HGLFT_LAST_CHAIN "hglft_lastChain" /* liftOver options: */ #define HGLFT_MINMATCH "hglft_minMatch" #define HGLFT_MINSIZEQ "hglft_minSizeQ" #define HGLFT_MINCHAINT "hglft_minChainT" #define HGLFT_MULTIPLE "hglft_multiple" #define HGLFT_MINBLOCKS "hglft_minBlocks" #define HGLFT_FUDGETHICK "hglft_fudgeThick" /* Global Variables */ struct cart *cart; /* CGI and other variables */ struct hash *oldVars = NULL; /* Filename prefix */ #define HGLFT "hglft" /* Javascript to support New Assembly pulldown when Orig Assembly changes */ /* Copies selected value from the Original Assembly pulldown to a hidden form */ char *onChange = "onchange=\"document.mainForm." HGLFT_REFRESHONLY_VAR ".value = 1;" "document.mainForm.submit();\""; char *chainStringVal(struct liftOverChain *chain) /* keep the last chain in memory in this format */ { char chainS[64]; safef(chainS, sizeof(chainS), "%s.%s", chain->fromDb, chain->toDb); return cloneString(chainS); } void webMain(struct liftOverChain *chain, boolean multiple, boolean keepSettings, int minSizeQ, int minChainT, float minBlocks, float minMatch, boolean fudgeThick) /* set up page for entering data */ { struct dbDb *dbList; char *fromOrg = hArchiveOrganism(chain->fromDb), *toOrg = hArchiveOrganism(chain->toDb); char *chainString = chainStringVal(chain); cgiParagraph( "This tool converts genome coordinates and genome annotation files " "between assemblies.  " "The input data can be pasted into the text box, or uploaded from a file.  " "If a pair of assemblies cannot be selected from the pull-down menus," " a direct lift between them is unavailable.  " "However, a sequential lift may be possible.  " "Example: lift from Mouse, May 2004, to Mouse, Feb. 2006, and then from Mouse, " "Feb. 2006 to Mouse, July 2007 to achieve a lift from mm5 to mm9.  " ""); /* create HMTL form */ puts("
\n"); cartSaveSession(cart); /* create HTML table for layout purposes */ puts("\n\n"); /* top two rows -- genome and assembly menus */ cgiSimpleTableRowStart(); cgiTableField("Original Genome: "); cgiTableField("Original Assembly: "); cgiTableField("New Genome: "); cgiTableField("New Assembly: "); cgiTableRowEnd(); cgiSimpleTableRowStart(); /* genome */ cgiSimpleTableFieldStart(); dbList = hGetLiftOverFromDatabases(); printSomeGenomeListHtmlNamed(HGLFT_FROMORG_VAR, chain->fromDb, dbList, onChange); cgiTableFieldEnd(); /* from assembly */ cgiSimpleTableFieldStart(); printAllAssemblyListHtmlParm(chain->fromDb, dbList, HGLFT_FROMDB_VAR, TRUE, onChange); cgiTableFieldEnd(); /* to assembly */ cgiSimpleTableFieldStart(); dbDbFreeList(&dbList); dbList = hGetLiftOverToDatabases(chain->fromDb); printLiftOverGenomeList(HGLFT_TOORG_VAR, chain->toDb, dbList, onChange); cgiTableFieldEnd(); cgiSimpleTableFieldStart(); printAllAssemblyListHtmlParm(chain->toDb, dbList, HGLFT_TODB_VAR, TRUE, ""); cgiTableFieldEnd(); cgiTableRowEnd(); cgiTableEnd(); cgiParagraph(" "); cgiSimpleTableStart(); cgiSimpleTableRowStart(); cgiTableField("Minimum ratio of bases that must remap:"); cgiSimpleTableFieldStart(); cgiMakeDoubleVar(HGLFT_MINMATCH, (keepSettings) ? minMatch : chain->minMatch,6); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField(" "); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("BED 4 to BED 6 Options"); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("Allow multiple output regions:"); cgiSimpleTableFieldStart(); cgiMakeCheckBox(HGLFT_MULTIPLE,multiple); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("  Minimum hit size in query:"); cgiSimpleTableFieldStart(); cgiMakeIntVar(HGLFT_MINSIZEQ,(keepSettings) ? minSizeQ : chain->minSizeQ,4); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("  Minimum chain size in target:"); cgiSimpleTableFieldStart(); cgiMakeIntVar(HGLFT_MINCHAINT,(keepSettings) ? minChainT : chain->minChainT,4); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField(" "); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("BED 12 Options"); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("Min ratio of alignment blocks or exons that must map:"); cgiSimpleTableFieldStart(); cgiMakeDoubleVar(HGLFT_MINBLOCKS,(keepSettings) ? minBlocks : chain->minBlocks,6); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("If thickStart/thickEnd is not mapped, use the closest mapped base:"); cgiSimpleTableFieldStart(); cgiMakeCheckBox(HGLFT_FUDGETHICK,(keepSettings) ? fudgeThick : (chain->fudgeThick[0]=='Y')); cgiTableFieldEnd(); cgiTableRowEnd(); cgiTableEnd(); /* text box and two buttons (submit, reset) */ cgiParagraph(" Paste in data (BED or chrN:start-end formats):\n"); cgiSimpleTableStart(); cgiSimpleTableRowStart(); cgiSimpleTableFieldStart(); cgiMakeTextArea(HGLFT_USERDATA_VAR, cartCgiUsualString(cart, HGLFT_USERDATA_VAR, NULL), 10, 80); cgiTableFieldEnd(); /* right element of table is a nested table * with two buttons stacked on top of each other */ cgiSimpleTableFieldStart(); cgiSimpleTableStart(); cgiSimpleTableRowStart(); cgiSimpleTableFieldStart(); cgiMakeSubmitButton(); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiSimpleTableFieldStart(); cgiMakeClearButton("mainForm", HGLFT_USERDATA_VAR); cgiTableFieldEnd(); cgiTableRowEnd(); cgiTableEnd(); cgiTableFieldEnd(); cgiTableRowEnd(); cgiTableEnd(); /* next row -- file upload controls */ cgiParagraph(" Or upload data from a file (BED or chrN:start-end in plain text format):"); cgiSimpleTableStart(); cgiSimpleTableRowStart(); printf("\n", HGLFT_DATAFILE_VAR); puts("\n"); cgiTableRowEnd(); cgiTableEnd(); printf("\n", HGLFT_REFRESHONLY_VAR); printf("\n", HGLFT_LAST_CHAIN, chainString); cartSaveSession(cart); puts(""); freeMem(fromOrg); freeMem(toOrg); } void webParamsUsed(float minMatch, boolean multiple, int minSizeQ, int minChainT, float minBlocks, boolean fudgeThick) { webNewSection("Parameters Used"); cgiSimpleTableStart(); cgiSimpleTableRowStart(); cgiTableField("Minimum ratio of bases that must remap:"); cgiSimpleTableFieldStart(); printf("%.2f",minMatch); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField(" "); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("BED 4 to BED 6 Options"); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("Allow multiple output regions:"); cgiSimpleTableFieldStart(); printf("%s", multiple ? "on" : "off"); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("  Minimum hit size in query:"); cgiSimpleTableFieldStart(); printf("%d",minSizeQ); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("  Minimum chain size in target:"); cgiSimpleTableFieldStart(); printf("%d",minChainT); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField(" "); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("BED 12 Options"); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("Min ratio of alignment blocks or exons that must map:"); cgiSimpleTableFieldStart(); printf("%.2f",minBlocks); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("If thickStart/thickEnd is not mapped, use the closest mapped base:"); cgiSimpleTableFieldStart(); printf("%s", fudgeThick ? "on" : "off"); cgiTableFieldEnd(); cgiTableRowEnd(); cgiTableEnd(); } void webDownloads() { webNewSection("Command Line Tool"); cgiParagraph( "To lift genome annotations locally on Linux systems, download the " "" "liftOver executable and the appropriate " "" "chain file." " Run liftOver with no arguments to see the usage message.\n"); } double scoreLiftOverChain(struct liftOverChain *chain, char *fromOrg, char *fromDb, char *toOrg, char *toDb, char *cartOrg, char *cartDb, struct hash *dbRank, struct hash *dbHash) /* Score the chain in terms of best match for cart settings */ { double score = 0; struct dbDb *chainFromDbDb = hashFindVal(dbHash, chain->fromDb); struct dbDb *chainToDbDb = hashFindVal(dbHash, chain->toDb); char *chainFromOrg = (chainFromDbDb) ? chainFromDbDb->organism : NULL; char *chainToOrg = (chainToDbDb) ? chainToDbDb->organism : NULL; int fromRank = hashIntValDefault(dbRank, chain->fromDb, 0); /* values up to approx. #assemblies */ int toRank = hashIntValDefault(dbRank, chain->toDb, 0); int maxRank = hashIntVal(dbRank, "maxRank"); if (!chainFromOrg || !chainToOrg) return 0; if (sameOk(fromOrg,chainFromOrg) && sameOk(fromDb,chain->fromDb) && sameOk(toOrg,chainToOrg) && sameOk(toDb,chain->toDb)) score += 10000000; if (sameOk(fromOrg,chainFromOrg)) score += 2000000; if (sameOk(fromDb,chain->fromDb)) score += 1000000; if (sameOk(toOrg,chainToOrg)) score += 200000; if (sameOk(toDb,chain->toDb)) score += 100000; if (sameOk(cartDb,chain->fromDb)) score += 20000; if (sameOk(cartDb,chain->toDb)) score += 10000; if (sameOk(cartOrg,chainFromOrg)) score += 2000; if (sameOk(cartOrg,chainToOrg)) score += 1000; score += 10*(maxRank-fromRank); score += (maxRank - toRank); return score; } struct liftOverChain *defaultChoices(struct liftOverChain *chainList, char *cartDb) /* Out of a list of liftOverChains and a cart, choose a * list to display. */ { char *fromOrg, *fromDb, *toOrg, *toDb, *cartOrg; struct liftOverChain *choice = NULL; struct hash *dbRank = hGetDatabaseRank(); struct hash *dbDbHash = hDbDbAndArchiveHash(); double bestScore = -1; struct liftOverChain *this = NULL; /* Get the initial values. */ fromOrg = cartCgiUsualString(cart, HGLFT_FROMORG_VAR, "0"); fromDb = cartCgiUsualString(cart, HGLFT_FROMDB_VAR, "0"); toOrg = cartCgiUsualString(cart, HGLFT_TOORG_VAR, "0"); toDb = cartCgiUsualString(cart, HGLFT_TODB_VAR, "0"); cartOrg = hArchiveOrganism(cartDb); if (sameWord(fromOrg,"0")) fromOrg = NULL; if (sameWord(fromDb,"0")) fromDb = NULL; if (sameWord(toOrg,"0")) toOrg = NULL; if (sameWord(toDb,"0")) toDb = NULL; if (sameWord(cartDb,"0")) cartDb = NULL; for (this = chainList; this != NULL; this = this->next) { double score = scoreLiftOverChain(this, fromOrg, fromDb, toOrg, toDb, cartOrg, cartDb, dbRank, dbDbHash); if (score > bestScore) { choice = this; bestScore = score; } } return choice; } void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { char *userData; char *organism; char *db; float minBlocks, minMatch; boolean multiple, fudgeThick; int minSizeQ, minChainT; boolean refreshOnly = FALSE; boolean keepSettings = FALSE; char *thisChain = NULL; char *lastChain = NULL; struct liftOverChain *chainList = NULL, *choice; cart = theCart; if (cgiOptionalString(HGLFT_ERRORHELP_VAR)) { puts("
");
    puts(liftOverErrHelp());
    puts("
"); return; } /* Get data to convert - from userData variable, or if * that is empty from a file. */ if (cartOptionalString(cart, "SubmitFile")) userData = cartOptionalString(cart, HGLFT_DATAFILE_VAR); else userData = cartOptionalString(cart, HGLFT_USERDATA_VAR); cartWebStart(cart, NULL, "Lift Genome Annotations"); getDbAndGenome(cart, &db, &organism, oldVars); chainList = liftOverChainList(); choice = defaultChoices(chainList, db); thisChain = chainStringVal(choice); if (choice == NULL) errAbort("Sorry, no conversions available from this assembly\n"); minSizeQ = cartCgiUsualInt(cart, HGLFT_MINSIZEQ, choice->minSizeQ); minChainT = cartCgiUsualInt(cart, HGLFT_MINCHAINT, choice->minChainT); minBlocks = cartCgiUsualDouble(cart, HGLFT_MINBLOCKS, choice->minBlocks); minMatch = cartCgiUsualDouble(cart, HGLFT_MINMATCH, choice->minMatch); fudgeThick = cartCgiUsualBoolean(cart, HGLFT_FUDGETHICK, (choice->fudgeThick[0]=='Y') ? TRUE : FALSE); multiple = cartCgiUsualBoolean(cart, HGLFT_MULTIPLE, (choice->multiple[0]=='Y') ? TRUE : FALSE); refreshOnly = cartCgiUsualInt(cart, HGLFT_REFRESHONLY_VAR, 0); lastChain = cartCgiUsualString(cart, HGLFT_LAST_CHAIN, NULL); if (lastChain && thisChain && sameString(lastChain, thisChain)) keepSettings = TRUE; webMain(choice, multiple, keepSettings, minSizeQ, minChainT, minBlocks, minMatch, fudgeThick); liftOverChainFreeList(&chainList); if (!refreshOnly && userData != NULL && userData[0] != '\0') { struct hash *chainHash = newHash(0); char *chainFile; struct tempName oldTn, mappedTn, unmappedTn; FILE *old, *mapped, *unmapped; char *line; int lineSize; char *fromDb, *toDb; int ct = -1, errCt = 0; enum liftOverFileType lft; /* read in user data and save to file */ makeTempName(&oldTn, HGLFT, ".user"); old = mustOpen(oldTn.forCgi, "w"); fputs(userData, old); fputs("\n", old); /* in case user doesn't end last line */ carefulClose(&old); chmod(oldTn.forCgi, 0666); /* setup output files -- one for converted lines, the other * for lines that could not be mapped */ makeTempName(&mappedTn, HGLFT, ".bed"); makeTempName(&unmappedTn, HGLFT, ".err"); mapped = mustOpen(mappedTn.forCgi, "w"); chmod(mappedTn.forCgi, 0666); unmapped = mustOpen(unmappedTn.forCgi, "w"); chmod(unmappedTn.forCgi, 0666); fromDb = cgiString(HGLFT_FROMDB_VAR); toDb = cgiString(HGLFT_TODB_VAR); chainFile = liftOverChainFile(fromDb, toDb); if (chainFile == NULL) errAbort("ERROR: Can't convert from %s to %s: no chain file loaded", fromDb, toDb); readLiftOverMap(chainFile, chainHash); lft = liftOverSniff(oldTn.forCgi); if (lft == bed) ct = liftOverBed(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minChainT, 0, fudgeThick, mapped, unmapped, multiple, NULL, &errCt); else if (lft == positions) ct = liftOverPositions(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minChainT, 0, fudgeThick, mapped, unmapped, multiple, NULL, &errCt); if (ct == -1) /* programming error */ errAbort("ERROR: Unsupported data format.\n"); webNewSection("Results"); if (ct > 0) { /* some records succesfully converted */ cgiParagraph(""); printf("Successfully converted %d record", ct); printf("%s: ", ct > 1 ? "s" : ""); printf("View Conversions\n", mappedTn.forCgi); } if (errCt) { /* some records not converted */ cgiParagraph(""); printf("Conversion failed on %d record", errCt); printf("%s.    ", errCt > 1 ? "s" : ""); printf("Display failure file   \n", unmappedTn.forCgi); printf("Explain failure messages\n", HGLFT_ERRORHELP_VAR); puts("

Failed input regions:\n"); struct lineFile *errFile = lineFileOpen(unmappedTn.forCgi, TRUE); puts("

\n");
        while (lineFileNext(errFile, &line, &lineSize))
            puts(line);
        lineFileClose(&errFile);
        puts("
\n"); } if ((multiple) && (lft == positions)) { puts("
\n");
	puts("Note: "multiple" option is not supported for position format.");
	puts("
\n"); } webParamsUsed(minMatch, multiple, minSizeQ, minChainT, minBlocks, fudgeThick); carefulClose(&unmapped); } webDownloads(); cartWebEnd(); } /* Null terminated list of CGI Variables we don't want to save * permanently. */ char *excludeVars[] = {"Submit", "submit", "SubmitFile", HGLFT_USERDATA_VAR, HGLFT_DATAFILE_VAR, HGLFT_ERRORHELP_VAR, NULL}; int main(int argc, char *argv[]) /* Process command line. */ { long enteredMainTime = clock1000(); oldVars = hashNew(10); cgiSpoof(&argc, argv); cartEmptyShell(doMiddle, hUserCookie(), excludeVars, oldVars); cgiExitTime("hgLiftOver", enteredMainTime); return 0; }