/* mdbPrint - Prints metadata objects and variables from the mdb metadata table. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "options.h" #include "mdb.h" #define OBJTYPE_DEFAULT "table" void usage() /* Explain usage and exit. */ { errAbort( "mdbPrint - Prints metadata objects, variables and values from '" MDB_DEFAULT_NAME "' table.\n" "usage:\n" " mdbPrint {db} [-table=] [-byVar] [-line/-count]\n" " [-all]\n" " [-vars=\"var1=val1 var2=val2...\"]\n" " [-obj= [-var= [-val=]]]\n" " [-var= [-val=]]\n" " [-specialHelp]\n" "Options:\n" " {db} Database to query metadata from. This argument is required.\n" " -table Table to query metadata from. Default is the sandbox version of\n" " '" MDB_DEFAULT_NAME "'.\n" " -byVar Print each var and val, then all objects that match, as\n" " opposed to printing objects and all the var=val pairs that match.\n" " -ra Default. Print each obj with set of indented var val pairs on\n" " separate lines and objects as a stanzas (-byVar prints pseudo-RA).\n" " -line Print each obj and all var=val pairs on a single line.\n" " -count Just print count of objects, variables and values selected.\n" " -cv Specify CV file path (e.g. from sandbox)" " -validate Validate mdb objects against cv.ra. (Incompatible with -byVars, -ra, -line.)\n" " -validateFull like validate but considers vars not defined in cv as invalid.\n" " -experimentify Groups objs into experiments defined in encodeExp table.\n" " -encodeExp={table} Optionally tell which encodeExp table to use.\n" " -specialHelp Prints help for some special case features.\n" " Four alternate ways to select metadata:\n" " -all Will print entire table (this could be huge).\n" " -vars={var=val...} Request a combination of var=val pairs.\n\n" " Use: 'var=val' 'var=v%%' 'var=' 'var=val1,val2' (val1 or val2).\n" " 'var!=val' 'var!=v%%' 'var!=' 'var!=val1,val2' are all supported.\n" " -obj={objName} Request a single object. Can be narrowed by var and val.\n" " -var={varName} Request a single variable. Can be narrowed by val.\n" "There are two basic views of the data: by objects and by variables. The default view " "is by object. Each object will print out in an RA style stanza (by default) or as " "a single line of output containing all var=val pairs. In 'byVar' view, each RA style " "stanza holds a var val pair and all objects belonging to that pair on separate lines. " "Linear 'byVar' view puts the entire var=val pair on one line. Alternatively, request " "only counting of objects, variables or values.\n" "HINT: Use '%%' in any obj, var or val as a wildcard for selection.\n\n" "Examples:\n" " mdbPrint hg19 -vars=\"grant=Snyder cell=GM12878 antibody=CTCF\"\n" " Return all objs that satify ALL of the constraints.\n" " mdbPrint mm9 -vars=\"grant=Snyder cell=GM12878 antibody=?\" -byVar\n" " Return each all vars for all objects with the constraint.\n" " mdbPrint hg18 -obj=wgEncodeUncFAIREseqPeaksPanislets -line\n" " Return a single formatted metadata line for one object.\n" " mdbPrint hg18 -count -var=cell -val=GM%%\n" " Return the count of objects which have a cell begining with 'GM'.\n"); } static struct optionSpec optionSpecs[] = { {"table", OPTION_STRING}, // default "metaDb" {"ra", OPTION_BOOLEAN},// ra format {"line", OPTION_BOOLEAN},// linear format {"composite",OPTION_STRING}, // Special case of a common var // (replaces vars="composite=wgEncodeBroadHistone") {"count", OPTION_BOOLEAN},// returns only counts of objects, vars and vals {"counts", OPTION_BOOLEAN},// sames as count {"cv", OPTION_STRING},// specify CV file path {"all", OPTION_BOOLEAN},// query entire table {"byVar", OPTION_BOOLEAN},// With -all prints from var perspective {"specialHelp",OPTION_BOOLEAN},// Certain very specialized features are described {"obj", OPTION_STRING}, // objName or objId {"var", OPTION_STRING}, // variable {"val", OPTION_STRING}, // value {"validate", OPTION_BOOLEAN},// Validate vars and vals against cv.ra terms {"validateFull", OPTION_BOOLEAN},// Like validate but considers vars not in cv as invalid {"experimentify",OPTION_BOOLEAN},// Validate Experiments as defined in hgFixed.encodeExp table {"encodeExp",OPTION_STRING}, // Optionally tell which encodeExp to use {"vars", OPTION_STRING},// var1=val1 var2=val2... {"or", OPTION_STRING},// or var1=val1 var2=val2... {"order", OPTION_STRING}, // comma delimited list of vars to order result by {"separator",OPTION_STRING}, // Optional separator used with order {"header", OPTION_BOOLEAN},// Optional inclusion of header used with order {"updDb", OPTION_STRING},// DB to update {"updMdb", OPTION_STRING},// MDB table to update {"updSelect",OPTION_STRING},// Experiment defining vars: "var1,var2" {"updVars", OPTION_STRING},// Vars to update: "var1,var2" {NULL, 0} }; void specialHelp() /* Explain usage and exit. */ { errAbort( "mdbPrint - Extra help for specialty selectors and output for metadata.\n" "usage:\n" " mdbPrint {db} -composite={} -vars=\"var1=val1 || var1=val2...\" [-or=\"var3=val3...\" ]\n" " -order={var1,var2,...} [-separator={\"any string\"} [-header]\n" " -updDB={db} -updMdb={metaDb} -updSelect=var1,var2,... -updVars=varA,varB,...\n" "Options:\n" " {db} Database to query metadata from. This argument is required.\n" " -composite={} Special commonly used var=val pair replaces -vars=\"composite=wgEn...\".\n" " -vars={var=val...} Request a combination of var=val pairs.\n" " Use: 'var!=val', 'var=v%%', 'var=v1,v2' (same as 'var=v1 || var=v2').\n" " -or={var=val...} When selecting by -obj or -vars can add orthoganal 'or' condition.\n" " Used for more complex selections. Note -composite will be common to both queries.\n" "Special functions:\n" " Print ordered vars:\n" " -order Prints only vars named and in the order named (comma delimited).\n" " -separator Optional, examples: \" \", \"\\t\\t\", \"\" (will make html table).\n" " -header Optionally include header.\n" " Print mdbUpdate lines to assist importing metadata from one db.table to another:\n" " -updDb Database to aim mdbUpdate lines at.\n" " -updMdb The metaDb table to aim mdbUpdate lines at.\n" " -updSelect A comma separated list of variables that will be selected with\n" " the mdbUpdate (via '-vars').\n" " -updVars A comma separated list of variables that will be set in the\n" " mdbUpdate lines (via '-setVars').\n" "The purpose of this special option is to generate mdbUpdate commands from existing metadata.\n" "Examples:\n" " mdbPrint hg18 -vars=\"composite=wgEncodeYaleChIPseq\" -updDb=hg19 -updMdb=metaDb_braney\n" " (cont.) -updSelect=grant,cell,antibody -updVars=dateSubmitted,dateUnrestricted\n" " This command assists importing dateSubmitted from hg18 to hg19 for all\n" " objects in hg19 that match the grant, cell and antibody of\n" " objects in hg18. It would result in output that looks something like:\n" " mdbUpdate hg19 -table=metaDb_cricket -vars=\"grant=Snyder cell=GM12878 antibody=c-Fos\"\n" " (cont.) -setVars=\"dateSubmitted=2009-02-13 dateUnrestricted=2009-11-12\" -test\n" " mdbUpdate hg19 -table=metaDb_braney -vars=\"grant=Snyder cell=GM12878 antibody=c-Jun\"\n" " (cont.) -setVars=\"dateSubmitted=2009-01-08 dateUnrestricted=2009-08-07\" -test\n" " mdbUpdate hg19 ...\n" " Note the '-test' in output to allow confirmation of effects before actual update.\n" " mdbPrint hg18 -vars=\"composite=wgEncodeYaleChIPseq view=RawSignal\" -updDb=hg18\n" " (cont.) -updMdb=metaDb_vsmalladi -updSelect=obj -updVars=fileName\n" " You can select by object too (but not in combination with other vars).\n" " This example shows how to assist updating vals to the same mdb, where an editor\n" " or awk will also be needed.\n"); } static void mdbObjPrintUpdateLines(struct mdbObj **mdbObjs,char *dbToUpdate, char *tableToUpdate, char *varsToSelect,char *varsToUpdate) // prints mdbUpdate lines to allow taking vars from one db to another (sorts mdbObjs // so pass pointer). Specialty print for importing vars from one db or table to another { if (dbToUpdate == NULL || tableToUpdate == NULL || varsToSelect == NULL || varsToUpdate == NULL) errAbort("mdbObjPrintUpdateLines is missing important parameter.\n"); // Parse variables that will be used to select mdb objects // varsToSelect is comma delimited string of var names. Vals are discovered in each obj int selCount = 0; char **selVars = NULL; if (differentWord(varsToSelect,"obj")) { // Sort objs to avoid duplicate mdbUpdate statements mdbObjsSortOnVars(mdbObjs, varsToSelect); // Parse list of selcting vars (could be simply expId or expId,replicate,view) selCount = chopByChar(varsToSelect,',',NULL,0); if (selCount <= 0) errAbort("mdbObjPrintUpdateLines is missing experiment defining variables.\n"); selVars = needMem(sizeof(char *) * selCount); selCount = chopByChar(varsToSelect,',',selVars,selCount); } // Parse variables that will be updated in selected mdb objects // varsToUpdate is comma delimited string of var names. Vals are discovered in each obj int updCount = chopByChar(varsToUpdate,',',NULL,0); if (updCount <= 0) errAbort("mdbObjPrintUpdateLines is missing variables to set.\n"); char **updVars = needMem(sizeof(char *) * updCount); updCount = chopByChar(varsToUpdate,',',updVars,updCount); int ix=0; // Special case when varsToUpdate contains ONLY expId={startingId} boolean updExpId = (updCount == 1 && startsWithWordByDelimiter("expId",'=',updVars[0])); int startingId=0; if (updExpId) { startingId = sqlSigned(skipBeyondDelimit(updVars[0],'=')); updVars[0][strlen("expId")] = '\0'; } // For each passed in obj, write an mdbUpdate statement struct mdbObj *mdbObj = NULL; struct dyString *thisSelection = newDyString(256); struct dyString *lastSelection = newDyString(256); for (mdbObj=*mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next) { if (mdbObj->obj == NULL || mdbObj->deleteThis) continue; // Build this selection string e.g. -vars="cell=GM23878 antibody=CTCF" dyStringClear(thisSelection); if (sameWord(varsToSelect,"obj")) { dyStringPrintf(thisSelection,"-obj=%s",mdbObj->obj); } else { dyStringAppend(thisSelection,"-vars=\""); for (ix = 0; ix < selCount; ix++) { char *val = mdbObjFindValue(mdbObj,selVars[ix]); if (val != NULL) // TODO what to do for NULLS? { if (strchr(val, ' ') != NULL) // Has blanks dyStringPrintf(thisSelection,"%s='%s' ",selVars[ix],val); // FIXME: Need to make single quotes work since already within double quotes! else dyStringPrintf(thisSelection,"%s=%s ",selVars[ix],val); } } dyStringAppend(thisSelection,"\""); } // Don't bother making another mdpUpdate line if selection is the same. if (dyStringLen(lastSelection) > 0 && sameString(dyStringContents(lastSelection),dyStringContents(thisSelection))) continue; dyStringClear(lastSelection); dyStringAppend(lastSelection,dyStringContents(thisSelection)); printf("mdbUpdate %s table=%s %s",dbToUpdate,tableToUpdate,dyStringContents(thisSelection)); // build the update string e.g. -setVars="dateSubmitted=2009-09-14 dateUnrestricted=2010-06-13" printf(" -setVars=\""); for (ix = 0; ix < updCount; ix++) { if (updExpId) printf("expId=%u",startingId++); // Special case expId is incrementing else { char *val = mdbObjFindValue(mdbObj,updVars[ix]); if (val != NULL) // What to do for NULLS? Ignore { printf("%s=",updVars[ix]); if (strchr(val, ' ') != NULL) // Has blanks printf("'%s' ",val); else printf("%s ",val); } } } printf("\" -test\n"); // Always test first } dyStringFree(&thisSelection); dyStringFree(&lastSelection); } static int sortCompositeFirst(const void *va, const void *vb) /* Promote composite object type to head of mdb object list */ { const struct mdbObj *a = *((struct mdbObj **)va); const struct mdbObj *b = *((struct mdbObj **)vb); if (mdbObjIsComposite((struct mdbObj *)a)) return -1; else if (mdbObjIsComposite((struct mdbObj *)b)) return 1; return 0; } int main(int argc, char *argv[]) // Process command line. { struct mdbObj * mdbObjs = NULL; struct mdbByVar * mdbByVars = NULL; int objsCnt=0, varsCnt=0,valsCnt=0; int retCode = 0; if (argc == 1) usage(); optionInit(&argc, argv, optionSpecs); if (optionExists("specialHelp")) specialHelp(); if (argc < 2) { verbose(1, "REQUIRED 'DB' argument not found:\n"); usage(); } char *db = argv[1]; char *table = optionVal("table",NULL); char *order = optionVal("order",NULL); char *orVars = NULL; char *cv = optionVal("cv",NULL); if (cv != NULL) cvFileDeclare(cv); boolean raStyle = TRUE; if (optionExists("line") && !optionExists("ra")) raStyle = FALSE; boolean justCounts = (optionExists("count") || optionExists("counts")); boolean byVar = optionExists("byVar"); boolean validate = (optionExists("validate") || optionExists("validateFull")); char *encodeExp = NULL; if (optionExists("experimentify")) { encodeExp = optionVal("encodeExp","encodeExp"); if (strlen(encodeExp) == 0 || sameWord("std",encodeExp)) encodeExp = "encodeExp"; } else if (optionExists("encodeExp")) errAbort("-encodeExp option requires -experimentify option.\n"); if ((validate || encodeExp != NULL) && (byVar || optionExists("line") || optionExists("ra") || optionExists("order"))) { verbose(1, "Incompatible to combine validate or experimentify option with " "'byVar', 'line', 'ra' or 'order':\n"); usage(); } boolean all = optionExists("all"); if (all) { if (optionExists("obj") || optionExists("var") || optionExists("val") || optionExists("vars")) usage(); } else if (optionExists("obj")) { mdbObjs = mdbObjCreate(optionVal("obj", NULL),optionVal("var", NULL), optionVal("val", NULL)); } else if (optionExists("vars")) { char *vars = optionVal("vars", NULL); orVars = strstr(vars," || "); if (orVars != NULL) { *orVars = '\0'; orVars += 4; } mdbByVars = mdbByVarsLineParse(vars); if (optionExists("composite")) mdbByVarAppend(mdbByVars,"composite", optionVal("composite", NULL),FALSE); if (optionExists("var")) mdbByVarAppend(mdbByVars,optionVal("var", NULL), optionVal("val", NULL),FALSE); } else if (optionExists("composite")) { mdbByVars = mdbByVarCreate("composite", optionVal("composite", NULL)); if (optionExists("var")) mdbByVarAppend(mdbByVars,optionVal("var", NULL), optionVal("val", NULL),FALSE); } else if (optionExists("var")) { mdbByVars = mdbByVarCreate(optionVal("var", NULL),optionVal("val", NULL)); if (optionExists("composite")) mdbByVarAppend(mdbByVars,"composite", optionVal("composite", NULL),FALSE); } else usage(); if (optionExists("or") || orVars != NULL) { if (byVar) errAbort("Unsupported to use -or with -byVar.\n"); if (optionExists("or")) { if (!optionExists("vars") && !optionExists("obj")) errAbort("Incompatible to use -or without -vars or -obj'.\n"); if (orVars != NULL) errAbort("Incompatible to use -or and ' || ' in -vars.\n"); else orVars = optionVal("or", NULL); } } struct sqlConnection *conn = sqlConnect(db); // Find the table if necessary if (table == NULL) { table = mdbTableName(conn,TRUE); // Look for sandBox name first if (table == NULL) errAbort("TABLE NOT FOUND: '%s.%s'.\n",db,MDB_DEFAULT_NAME); } if (encodeExp != NULL) verbose(1, "Using tables named '%s.%s' and 'hgFixed.%s'.\n",db,table,encodeExp); else verbose(1, "Using table named '%s.%s'.\n",db,table); if (byVar) { if (!all && !validate && mdbByVars == NULL) // assertable usage(); // Requested a single var struct mdbByVar * queryResults = mdbByVarsQuery(conn,table,mdbByVars); if (queryResults == NULL) verbose(1, "No metadata met your selection criteria\n"); else { objsCnt=mdbByVarCount(queryResults,FALSE,FALSE); varsCnt=mdbByVarCount(queryResults,TRUE ,FALSE); valsCnt=mdbByVarCount(queryResults,FALSE,TRUE ); if (!justCounts) mdbByVarPrint(queryResults,raStyle); mdbByVarsFree(&queryResults); } } else { struct mdbObj * queryResults = NULL; if (mdbByVars != NULL) { // Requested a set of var=val pairs and looking for the // unique list of objects that have all of them! queryResults = mdbObjsQueryByVars(conn,table,mdbByVars); } else { // Requested a single obj queryResults = mdbObjQuery(conn,table,mdbObjs); } boolean resort = FALSE; while (orVars != NULL) { char *vars = orVars; orVars = strstr(vars," || "); if (orVars != NULL) { *orVars = '\0'; orVars += 4; } struct mdbByVar * orByVars = orByVars = mdbByVarsLineParse(vars); if (optionExists("composite")) mdbByVarAppend(orByVars,"composite", optionVal("composite", NULL),FALSE); if (optionExists("var")) mdbByVarAppend(orByVars,optionVal("var", NULL), optionVal("val", NULL),FALSE); struct mdbObj * orResults = mdbObjsQueryByVars(conn,table,orByVars); if (orResults != NULL) { // Merge be removing dups from orResults and cating together. orResults = mdbObjIntersection(&orResults,queryResults); queryResults = slCat(queryResults,orResults); resort = TRUE; } } if (resort) slSort(&queryResults,&mdbObjCmp); // Need to be returned to obj order //if(optionExists("or")) // { // struct mdbByVar * orByVars = orByVars = mdbByVarsLineParse(optionVal("or", NULL)); // if (optionExists("composite")) // mdbByVarAppend(orByVars,"composite", optionVal("composite", NULL),FALSE); // if (optionExists("var")) // mdbByVarAppend(orByVars,optionVal("var", NULL), optionVal("val", NULL),FALSE); // struct mdbObj * orResults = mdbObjsQueryByVars(conn,table,orByVars); // // Merge be removing dups from orResults and cating together. // orResults = mdbObjIntersection(&orResults,queryResults); // queryResults = slCat(queryResults,orResults); // slSort(&queryResults,&mdbObjCmp); // Need to be returned to obj order // } if (queryResults == NULL) { verbose(1, "No metadata met your selection criteria\n"); retCode = 1; } else { objsCnt=mdbObjCount(queryResults,TRUE); varsCnt=mdbObjCount(queryResults,FALSE); valsCnt=varsCnt; if (!justCounts) { if (optionExists("updSelect")) // Special print of mdbUpdate lines { if (!optionExists("updDb") || !optionExists("updMdb") || !optionExists("updVars")) errAbort("To print mdbUpdate lines, all the following values are needed: " "'-updDb=' '-updMdb=' '-updSelect=' '-updVars='.\n"); mdbObjPrintUpdateLines(&queryResults,optionVal("updDb",NULL), optionVal("updMdb",NULL),optionVal("updSelect",NULL), optionVal("updVars",NULL)); } else if (encodeExp != NULL) // Organizes vars as experiments and validates expId values { struct mdbObj *updatable = mdbObjsEncodeExperimentify(conn,db,table,encodeExp, &queryResults,2,FALSE,FALSE); // 2=full experiments described printf("%d of %d obj%s can have their experiment IDs updated now.\n", slCount(updatable),objsCnt,(objsCnt==1?"":"s")); if (slCount(updatable) < objsCnt) retCode = 2; mdbObjsFree(&updatable); } else if (validate) // Validate vars and vals against cv.ra { int invalids = mdbObjsValidate(queryResults,optionExists("validateFull")); verbose(1,"%d invalid%s of %d variable%s\n",invalids,(invalids==1?"":"s"), varsCnt,(varsCnt==1?"":"s")); if (invalids > 0) retCode = 3; } else if (order != NULL) { char *sep = optionVal("separator",NULL); boolean header = optionExists("header"); mdbObjPrintOrderedToStream(stdout,&queryResults,order, sep, header); } else { // Default operation here if (optionExists("composite") && !mdbObjIsComposite(queryResults)) /* Pull composite metaObject to head of list for readability */ slSort(&queryResults, &sortCompositeFirst); mdbObjPrint(queryResults,raStyle); } } mdbObjsFree(&queryResults); } } sqlDisconnect(&conn); if (justCounts) { printf("%d object%s\n",objsCnt,(objsCnt==1?"":"s")); printf("%d variable%s\n",varsCnt,(varsCnt==1?"":"s")); printf("%d value%s\n",valsCnt,(valsCnt==1?"":"s")); } else if ( varsCnt>0 || valsCnt>0 || objsCnt>0 ) { if (byVar) verbose(1,"vars:%d vals:%d objects:%d\n",varsCnt,valsCnt,objsCnt); else verbose(1,"objects:%d vars:%d\n",objsCnt,varsCnt); } if (mdbObjs) mdbObjsFree(&mdbObjs); if (mdbByVars) mdbByVarsFree(&mdbByVars); return retCode; }