/* bedToBigBed - Convert bed to bigBed.. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "dystring.h" #include "obscure.h" #include "asParse.h" #include "basicBed.h" #include "sig.h" #include "rangeTree.h" #include "zlibFace.h" #include "sqlNum.h" #include "bPlusTree.h" #include "bigBed.h" char *version = "2.5"; /* Things set directly or indirectly by command lne in main() routine. */ int blockSize = 256; int itemsPerSlot = 512; char *extraIndex = NULL; int bedN = 0; /* number of standard bed fields */ int bedP = 0; /* number of bed plus fields */ char *asFile = NULL; char *asText = NULL; static boolean doCompress = FALSE; static boolean tabSep = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "bedToBigBed v. %s - Convert bed file to bigBed. (BigBed version: %d)\n" "usage:\n" " bedToBigBed in.bed chrom.sizes out.bb\n" "Where in.bed is in one of the ascii bed formats, but not including track lines\n" "and chrom.sizes is two column: \n" "and out.bb is the output indexed big bed file.\n" "Use the script: fetchChromSizes to obtain the actual chrom.sizes information\n" "from UCSC, please do not make up a chrom sizes from your own information.\n" "The in.bed file must be sorted by chromosome,start,\n" " to sort a bed file, use the unix sort command:\n" " sort -k1,1 -k2,2n unsorted.bed > sorted.bed\n" "\n" "options:\n" " -type=bedN[+[P]] : \n" " N is between 3 and 15, \n" " optional (+) if extra \"bedPlus\" fields, \n" " optional P specifies the number of extra fields. Not required, but preferred.\n" " Examples: -type=bed6 or -type=bed6+ or -type=bed6+3 \n" " (see http://genome.ucsc.edu/FAQ/FAQformat.html#format1)\n" " -as=fields.as - If you have non-standard \"bedPlus\" fields, it's great to put a definition\n" " of each field in a row in AutoSql format here.\n" " -blockSize=N - Number of items to bundle in r-tree. Default %d\n" " -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n" " -unc - If set, do not use compression.\n" " -tab - If set, expect fields to be tab separated, normally\n" " expects white space separator.\n" " -extraIndex=fieldList - If set, make an index on each field in a comma separated list\n" " extraIndex=name and extraIndex=name,id are commonly used.\n" , version, bbiCurrentVersion, blockSize, itemsPerSlot ); } static struct optionSpec options[] = { {"blockSize", OPTION_INT}, {"itemsPerSlot", OPTION_INT}, {"type", OPTION_STRING}, {"as", OPTION_STRING}, {"unc", OPTION_BOOLEAN}, {"tab", OPTION_BOOLEAN}, {"extraIndex", OPTION_STRING}, {NULL, 0}, }; int bbNamedFileChunkCmpByName(const void *va, const void *vb) /* Compare two named offset object to facilitate qsorting by name. */ { const struct bbNamedFileChunk *a = va, *b = vb; return strcmp(a->name, b->name); } static int maxBedNameSize; void bbNamedFileChunkKey(const void *va, char *keyBuf) /* Copy name to keyBuf for bPlusTree maker */ { const struct bbNamedFileChunk *item = va; strncpy(keyBuf,item->name, maxBedNameSize); } void *bbNamedFileChunkVal(const void *va) /* Return pointer to val for bPlusTree maker. */ { const struct bbNamedFileChunk *item = va; return (void *)&item->offset; } void bbExIndexMakerAddKeysFromRow(struct bbExIndexMaker *eim, char **row, int recordIx) /* Save the keys that are being indexed by row in eim. */ { int i; for (i=0; iindexCount; ++i) { int rowIx = eim->indexFields[i]; eim->chunkArrayArray[i][recordIx].name = cloneString(row[rowIx]); } } void bbExIndexMakerAddOffsetSize(struct bbExIndexMaker *eim, bits64 offset, bits64 size, long startIx, long endIx) /* Update offset and size fields of all file chunks between startIx and endIx */ { int i; for (i=0; iindexCount; ++i) { struct bbNamedFileChunk *chunks = eim->chunkArrayArray[i]; long j; for (j = startIx; j < endIx; ++j) { struct bbNamedFileChunk *chunk = chunks + j; chunk->offset = offset; chunk->size = size; } } } static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, boolean doCompress, FILE *f, int resTryCount, int resScales[], int resSizes[], struct bbExIndexMaker *eim, int bedCount, bits16 fieldCount, bits32 *retMaxBlockSize) /* Read through lf, writing it in f. Save starting points of blocks (every itemsPerSlot) * to boundsArray */ { int maxBlockSize = 0; struct bbiChromUsage *usage = usageList; char *line, *row[fieldCount+1]; int lastField = fieldCount-1; int itemIx = 0, sectionIx = 0; bits64 blockStartOffset = 0; int startPos = 0, endPos = 0; bits32 chromId = 0; struct dyString *stream = dyStringNew(0); /* Will keep track of some things that help us determine how much to reduce. */ bits32 resEnds[resTryCount]; int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; boolean atEnd = FALSE, sameChrom = FALSE; bits32 start = 0, end = 0; char *chrom = NULL; struct bed *bed; AllocVar(bed); /* Help keep track of which beds are in current chunk so as to write out * namedChunks to eim if need be. */ long sectionStartIx = 0, sectionEndIx = 0; for (;;) { /* Get next line of input if any. */ if (lineFileNextReal(lf, &line)) { /* Chop up line and make sure the word count is right. */ int wordCount; if (tabSep) wordCount = chopTabs(line, row); else wordCount = chopLine(line, row); lineFileExpectWords(lf, fieldCount, wordCount); loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE); chrom = bed->chrom; start = bed->chromStart; end = bed->chromEnd; sameChrom = sameString(chrom, usage->name); } else /* No next line */ { atEnd = TRUE; } /* Check conditions that would end block and save block info and advance to next if need be. */ if (atEnd || !sameChrom || itemIx >= itemsPerSlot) { /* Save stream to file, compressing if need be. */ if (stream->stringSize > maxBlockSize) maxBlockSize = stream->stringSize; if (doCompress) { size_t maxCompSize = zCompBufSize(stream->stringSize); // keep around an area of scratch memory static int compBufSize = 0; static char *compBuf = NULL; // check to see if buffer needed for compression is big enough if (compBufSize < maxCompSize) { // free up the old not-big-enough piece freez(&compBuf); // freez knows bout NULL // get new scratch area compBufSize = maxCompSize; compBuf = needLargeMem(compBufSize); } int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize); mustWrite(f, compBuf, compSize); } else mustWrite(f, stream->string, stream->stringSize); dyStringClear(stream); /* Save block offset and size for all named chunks in this section. */ if (eim != NULL) { bits64 blockEndOffset = ftell(f); bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset, sectionStartIx, sectionEndIx); sectionStartIx = sectionEndIx; } /* Save info on existing block. */ struct bbiBoundsArray *b = &bounds[sectionIx]; b->offset = blockStartOffset; b->range.chromIx = chromId; b->range.start = startPos; b->range.end = endPos; ++sectionIx; itemIx = 0; if (atEnd) break; } /* Advance to next chromosome if need be and get chromosome id. */ if (!sameChrom) { usage = usage->next; assert(usage != NULL); assert(sameString(chrom, usage->name)); for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; } chromId = usage->id; /* At start of block we save a lot of info. */ if (itemIx == 0) { blockStartOffset = ftell(f); startPos = start; endPos = end; } /* Otherwise just update end. */ { if (endPos < end) endPos = end; /* No need to update startPos since list is sorted. */ } /* Save name into namedOffset if need be. */ if (eim != NULL) { bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx); sectionEndIx += 1; } /* Write out data. */ dyStringWriteOne(stream, chromId); dyStringWriteOne(stream, start); dyStringWriteOne(stream, end); if (fieldCount > 3) { int i; /* Write 3rd through next to last field and a tab separator. */ for (i=3; i= resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = start + resScales[resTry]; } while (end > resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = resEnd + resScales[resTry]; } } } assert(sectionIx == sectionCount); freez(&bed); *retMaxBlockSize = maxBlockSize; } struct rbTree *rangeTreeForBedChrom(struct lineFile *lf, char *chrom) /* Read lines from bed file as long as they match chrom. Return a rangeTree that * corresponds to the coverage. */ { struct rbTree *tree = rangeTreeNew(); char *line; while (lineFileNextReal(lf, &line)) { if (!startsWithWord(chrom, line)) { lineFileReuse(lf); break; } char *row[3]; chopLine(line, row); unsigned start = sqlUnsigned(row[1]); unsigned end = sqlUnsigned(row[2]); rangeTreeAddToCoverageDepth(tree, start, end); } return tree; } static struct bbiSummary *bedWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress, struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart, struct bbiSummaryElement *totalSum) /* Write out data reduced by factor of initialReduction. Also calculate and keep in memory * next reduction level. This is more work than some ways, but it keeps us from having to * keep the first reduction entirely in memory. */ { struct bbiSummary *twiceReducedList = NULL; bits32 doubleReductionSize = initialReduction * zoomIncrement; struct bbiChromUsage *usage = usageList; struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd; boundsPt = AllocArray(boundsArray, initialReductionCount); boundsEnd = boundsPt + initialReductionCount; *retDataStart = ftell(f); writeOne(f, initialReductionCount); /* This gets a little complicated I'm afraid. The strategy is to: * 1) Build up a range tree that represents coverage depth on that chromosome * This also has the nice side effect of getting rid of overlaps. * 2) Stream through the range tree, outputting the initial summary level and * further reducing. */ boolean firstTime = TRUE; struct bbiSumOutStream *stream = bbiSumOutStreamOpen(itemsPerSlot, f, doCompress); for (usage = usageList; usage != NULL; usage = usage->next) { struct bbiSummary oneSummary, *sum = NULL; struct rbTree *rangeTree = rangeTreeForBedChrom(lf, usage->name); struct range *range, *rangeList = rangeTreeList(rangeTree); for (range = rangeList; range != NULL; range = range->next) { /* Grab values we want from range. */ double val = ptToInt(range->val); int start = range->start; int end = range->end; bits32 size = end - start; /* Add to total summary. */ if (firstTime) { totalSum->validCount = size; totalSum->minVal = totalSum->maxVal = val; totalSum->sumData = val*size; totalSum->sumSquares = val*val*size; firstTime = FALSE; } else { totalSum->validCount += size; if (val < totalSum->minVal) totalSum->minVal = val; if (val > totalSum->maxVal) totalSum->maxVal = val; totalSum->sumData += val*size; totalSum->sumSquares += val*val*size; } /* If start past existing block then output it. */ if (sum != NULL && sum->end <= start) { bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, &boundsPt, boundsEnd, usage->size, lm, stream); sum = NULL; } /* If don't have a summary we're working on now, make one. */ if (sum == NULL) { oneSummary.chromId = usage->id; oneSummary.start = start; oneSummary.end = start + initialReduction; if (oneSummary.end > usage->size) oneSummary.end = usage->size; oneSummary.minVal = oneSummary.maxVal = val; oneSummary.sumData = oneSummary.sumSquares = 0.0; oneSummary.validCount = 0; sum = &oneSummary; } /* Deal with case where might have to split an item between multiple summaries. This * loop handles all but the final affected summary in that case. */ while (end > sum->end) { /* Fold in bits that overlap with existing summary and output. */ int overlap = rangeIntersection(start, end, sum->start, sum->end); assert(overlap > 0); verbose(3, "Splitting size %d at %d, overlap %d\n", end - start, sum->end, overlap); sum->validCount += overlap; if (sum->minVal > val) sum->minVal = val; if (sum->maxVal < val) sum->maxVal = val; sum->sumData += val * overlap; sum->sumSquares += val*val * overlap; bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, &boundsPt, boundsEnd, usage->size, lm, stream); size -= overlap; /* Move summary to next part. */ sum->start = start = sum->end; sum->end = start + initialReduction; if (sum->end > usage->size) sum->end = usage->size; sum->minVal = sum->maxVal = val; sum->sumData = sum->sumSquares = 0.0; sum->validCount = 0; } /* Add to summary. */ sum->validCount += size; if (sum->minVal > val) sum->minVal = val; if (sum->maxVal < val) sum->maxVal = val; sum->sumData += val * size; sum->sumSquares += val*val * size; } if (sum != NULL) { bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, &boundsPt, boundsEnd, usage->size, lm, stream); } rangeTreeFree(&rangeTree); } bbiSumOutStreamClose(&stream); /* Write out 1st zoom index. */ int indexOffset = *retIndexStart = ftell(f); assert(boundsPt == boundsEnd); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), initialReductionCount, blockSize, itemsPerSlot, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); freez(&boundsArray); slReverse(&twiceReducedList); return twiceReducedList; } struct bbExIndexMaker *bbExIndexMakerNew(struct slName *extraIndexList, struct asObject *as) /* Return an index maker corresponding to extraIndexList. Checks that all fields * mentioned are in autoSql definition, and for now that they are all text fields. */ { /* Fill in scalar fields and return quickly if no extra indexes. */ struct bbExIndexMaker *eim; AllocVar(eim); eim->indexCount = slCount(extraIndexList); if (eim->indexCount == 0) return eim; // Not much to do in this case /* Allocate arrays according field count. */ AllocArray(eim->indexFields, eim->indexCount); AllocArray(eim->maxFieldSize, eim->indexCount); AllocArray(eim->chunkArrayArray, eim->indexCount); AllocArray(eim->fileOffsets, eim->indexCount); /* Loop through each field checking that it is indeed something we can index * and if so saving information about it */ int indexIx = 0; struct slName *name; for (name = extraIndexList; name != NULL; name = name->next) { struct asColumn *col = asColumnFind(as, name->name); if (col == NULL) errAbort("extraIndex field %s not a standard bed field or found in 'as' file.", col->name); if (!sameString(col->lowType->name, "string")) errAbort("Sorry for now can only index string fields."); eim->indexFields[indexIx] = slIxFromElement(as->columnList, col); ++indexIx; } return eim; } void bbExIndexMakerAllocChunkArrays(struct bbExIndexMaker *eim, int recordCount) /* Allocate the big part of the extra index maker - the part that holds which * chunk is used for each record. */ { eim->recordCount = recordCount; int i; for (i=0; iindexCount; ++i) AllocArray(eim->chunkArrayArray[i], recordCount); } void bbExIndexMakerFree(struct bbExIndexMaker **pEim) /* Free up memory associated with bbExIndexMaker */ { struct bbExIndexMaker *eim = *pEim; if (eim != NULL) { if (eim->chunkArrayArray != NULL) { int i; for (i=0; iindexCount; ++i) freeMem(eim->chunkArrayArray[i]); } freeMem(eim->indexFields); freeMem(eim->maxFieldSize); freeMem(eim->chunkArrayArray); freeMem(eim->fileOffsets); freez(pEim); } } void bbFileCreate( char *inName, /* Input file in a tabular bed format + whatever. */ char *chromSizes, /* Two column tab-separated file: . */ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ char *asText, /* Field definitions in a string */ struct asObject *as, /* Field definitions parsed out */ boolean doCompress, /* If TRUE then compress data. */ struct slName *extraIndexList, /* List of extra indexes to add */ char *outName) /* BigBed output file name. */ /* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */ { /* Set up timing measures. */ verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); bits16 fieldCount = slCount(as->columnList); bits16 extraIndexCount = slCount(extraIndexList); struct bbExIndexMaker *eim = NULL; if (extraIndexList != NULL) eim = bbExIndexMakerNew(extraIndexList, as); /* Load in chromosome sizes. */ struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "Read %d chromosomes and sizes from %s\n", chromSizesHash->elCount, chromSizes); /* Do first pass, mostly just scanning file and counting hits per chromosome. */ int minDiff = 0; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, &minDiff, &aveSize, &bedCount); verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList)); verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize); /* Open output file and write dummy header. */ FILE *f = mustOpen(outName, "wb"); bbiWriteDummyHeader(f); bbiWriteDummyZooms(f); /* Write out autoSql string */ bits64 asOffset = ftell(f); mustWrite(f, asText, strlen(asText) + 1); verbose(2, "as definition has %d columns\n", fieldCount); /* Write out dummy total summary. */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); bits64 totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write out dummy header extension */ bits64 extHeaderOffset = ftell(f); bits16 extHeaderSize = 64; repeatCharOut(f, 0, extHeaderSize); /* Write out extra index stuff if need be. */ bits64 extraIndexListOffset = 0; bits64 extraIndexListEndOffset = 0; if (extraIndexList != NULL) { extraIndexListOffset = ftell(f); int extraIndexSize = 16 + 4*1; // Fixed record size 16, plus 1 times field size of 4 repeatCharOut(f, 0, extraIndexSize*extraIndexCount); extraIndexListEndOffset = ftell(f); } /* Write out chromosome/size database. */ bits64 chromTreeOffset = ftell(f); bbiWriteChromInfo(usageList, blockSize, f); /* Set up to keep track of possible initial reduction levels. */ int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); /* Write out primary full resolution data in sections, collect stats to use for reductions. */ bits64 dataOffset = ftell(f); writeOne(f, bedCount); bits32 blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); struct bbiBoundsArray *boundsArray; AllocArray(boundsArray, blockCount); lineFileRewind(lf); bits32 maxBlockSize = 0; if (eim) bbExIndexMakerAllocChunkArrays(eim, bedCount); writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress, f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize); verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", (long long)bedCount, fieldCount); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); freez(&boundsArray); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; /* Call monster zoom maker library function that bedGraphToBigWig also uses. */ int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, bedWriteReducedOnceReturnReducedTwice, fieldCount, doCompress, indexOffset - dataOffset, usageList, resTryCount, resScales, resSizes, zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum); /* Write out extra indexes if need be. */ if (eim) { int i; for (i=0; iindexCount; ++i) { eim->fileOffsets[i] = ftell(f); maxBedNameSize = eim->maxFieldSize[i]; assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0])); bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, sizeof(bits64) + sizeof(bits64), f); verboseTime(1, "Sorting and writing extra index %d", i); } } /* Figure out buffer size needed for uncompression if need be. */ if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); uncompressBufSize = max(maxBlockSize, maxZoomUncompSize); } /* Go back and rewrite header. */ rewind(f); bits32 sig = bigBedSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = zoomLevels; bits32 reserved32 = 0; bits64 reserved64 = 0; bits16 definedFieldCount = bedN; /* Write fixed header */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); writeOne(f, chromTreeOffset); writeOne(f, dataOffset); writeOne(f, indexOffset); writeOne(f, fieldCount); writeOne(f, definedFieldCount); writeOne(f, asOffset); writeOne(f, totalSummaryOffset); writeOne(f, uncompressBufSize); writeOne(f, extHeaderOffset); assert(ftell(f) == 64); /* Write summary headers with data. */ int i; verbose(2, "Writing %d levels of zoom\n", zoomLevels); for (i=0; ifileOffsets[i]); repeatCharOut(f, 0, 4); // reserved // Write out field list - easy this time because for now always only one field. bits16 fieldId = eim->indexFields[i]; writeOne(f, fieldId); repeatCharOut(f, 0, 2); // reserved } assert(ftell(f) == extraIndexListEndOffset); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up. */ lineFileClose(&lf); carefulClose(&f); freeHash(&chromSizesHash); bbiChromUsageFreeList(&usageList); asObjectFreeList(&as); } void bedToBigBed(char *inName, char *chromSizes, char *outName) /* bedToBigBed - Convert bed file to bigBed.. */ { struct slName *extraIndexList = slNameListFromString(extraIndex, ','); struct asObject *as = asParseText(asText); asCompareObjAgainstStandardBed(as, bedN, TRUE); // abort if bedN columns are not standard bbFileCreate(inName, chromSizes, blockSize, itemsPerSlot, asText, as, doCompress, extraIndexList, outName); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); blockSize = optionInt("blockSize", blockSize); itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot); asFile = optionVal("as", asFile); doCompress = !optionExists("unc"); extraIndex = optionVal("extraIndex", NULL); tabSep = optionExists("tab"); if (argc != 4) usage(); if (optionExists("type")) { // parse type char *btype = cloneString(optionVal("type", "")); char *plus = strchr(btype, '+'); if (plus) { *plus++ = 0; if (isdigit(*plus)) bedP = sqlUnsigned(plus); } if (!startsWith("bed", btype)) errAbort("type must begin with \"bed\""); btype +=3; bedN = sqlUnsigned(btype); if (bedN < 3) errAbort("Bed must be 3 or higher, found %d\n", bedN); if (bedN > 15) errAbort("Bed must be 15 or lower, found %d\n", bedN); } else { if (asText) errAbort("If you specify the .as file, you must specify the -type as well so that\n" "the number of standard BED columns is known."); } /* If the haven't set bedN and bedP from the type var in the command line, then we sniff it * out from file. */ char *bedFileName = argv[1]; if (bedN == 0) { /* Just read in single line and count fields. */ struct lineFile *lf = lineFileOpen(bedFileName, TRUE); char *line; if (!lineFileNextReal(lf, &line)) errAbort("%s is empty", lf->fileName); int fieldCount; if (tabSep) fieldCount = chopByChar(line, '\t', NULL, 0); // Do not use chopString, see GOTCHA else fieldCount = chopByWhite(line, NULL, 0); if (fieldCount > 256) errAbort("Too many columns in %s, you sure it's a bed file?", lf->fileName); lineFileClose(&lf); /* Set up so that it looks like we are straight up bed for that many fields, * or if more than or maximum defined fields, then for bed15+ */ bedN = fieldCount; if (bedN > bedKnownFields) { bedP = bedN - bedKnownFields; bedN = bedKnownFields; } } /* Make sure that fields are defined, from bed spec if nowhere else. */ if (asFile) readInGulp(asFile, &asText, NULL); else asText = bedAsDef(bedN, bedN + bedP); bedToBigBed(bedFileName, argv[2], argv[3]); optionFree(); if (verboseLevel() > 1) printVmPeak(); return 0; }