/* repMaskJobs - make file of RepeatMasker jobs for condor. */ #include "common.h" #include "portable.h" void usage() /* Print usage instructions and exit. */ { errAbort( "repMaskJobs - make Condor submission file for a bunch of\n" "repeat masker jobs.\n" "usage:\n" " repMaskJobs dir maxJobs clonesPerJob\n" "Dir should be something like ~/gs/fin or ~/gs/draft.\n" "This will create rmskNN.con files containing up to maxJobs\n" "jobs in this directory, which can be submitted to condor.\n" "The condor scripts will put .err, and .log files \n" "a log subdirectory. The files in a .fa directory will be\n" "masked and .out files put there. A masked dir will also be\n" "created and filled with .fa.masked files\n"); } char *fullPathName(char *relName) /* Return full version of path name. */ { char firstChar = relName[0]; char fullPath[512]; char dir[512]; if (firstChar == '/' || firstChar == '~') return cloneString(relName); getcwd(dir, sizeof(dir)); sprintf(fullPath, "%s/%s", dir, relName); return cloneString(fullPath); } void repMaskJobs(char *rootDir, int maxJobCount, int batchSize) /* Make condor submission file to repeat mask everything * in faDir. */ { char faDir[512], logDir[512], maskDir[512]; char outName[512]; struct slName *faDirList, *faFile; int jobCount = 0; FILE *out = NULL; int conCount = 0; rootDir = fullPathName(rootDir); /* Make sub-directory names and directories. */ sprintf(faDir, "%s/fa", rootDir); sprintf(logDir, "%s/rmsk.log", rootDir); sprintf(maskDir, "%s/masked", rootDir); makeDir(logDir); makeDir(maskDir); /* Get list of all .fa files to mask. */ faDirList = listDir(faDir, "*.fa"); if (slCount(faDirList) < 1) errAbort("No .fa files in %s\n", faDir); /* Write out stuff for each fa file. */ for (faFile = faDirList; faFile != NULL; ) { char *fa = faFile->name; int i; /* Open output and write out header. */ if (out == NULL) { sprintf(outName, "%s/rmsk%02d.con", rootDir, ++conCount); uglyf("New file %s\n", outName); out = mustOpen(outName, "w"); fprintf(out, "# Condor submit file %d to repeat mask seq in %s.\n", conCount, faDir); fprintf(out, "# Generated by repMaskJobs\n\n"); fprintf(out, "initialdir\t= %s\n", faDir); fprintf(out, "universe\t= vanilla\n"); fprintf(out, "notification\t= error\n"); fprintf(out, "requirements\t= memory > 120\n"); fprintf(out, "executable\t= %s/rmsk.sh\n", rootDir); fprintf(out, "output\t= /dev/null\n"); fprintf(out, "\n"); } fprintf(out, "error\t= %s/%s.err\n", logDir, fa); fprintf(out, "log\t= %s/%s.log\n", logDir, fa); fprintf(out, "arguments\t="); for (i=0; iname); faFile = faFile->next; if (faFile == NULL) break; } fprintf(out, "\n"); fprintf(out, "queue 1\n"); fprintf(out, "\n"); if (++jobCount >= maxJobCount) { carefulClose(&out); jobCount = 0; } } carefulClose(&out); } int main(int argc, char *argv[]) /* Process command line. */ { if (argc != 4 || !isdigit(argv[2][0]) || !isdigit(argv[3][0])) usage(); repMaskJobs(argv[1], atoi(argv[2]), atoi(argv[3])); }