#!/usr/bin/env python # Author: Heng Li and Aaron Quinlan # License: MIT/X11 import sys from ctypes import * from ctypes.util import find_library import glob, platform def load_shared_library(lib, _path='.', ver='*'): """Search for and load the tabix library. The expectation is that the library is located in the current directory (ie. "./") """ # find from the system path path = find_library(lib) if (path == None): # if fail, search in the custom directory s = platform.system() if (s == 'Darwin'): suf = ver+'.dylib' elif (s == 'Linux'): suf = '.so'+ver candidates = glob.glob(_path+'/lib'+lib+suf); if (len(candidates) == 1): path = candidates[0] else: return None cdll.LoadLibrary(path) return CDLL(path) def tabix_init(): """Initialize and return a tabix reader object for subsequent tabix_get() calls. """ tabix = load_shared_library('tabix') if (tabix == None): return None tabix.ti_read.restype = c_char_p # on Mac OS X 10.6, the following declarations are required. tabix.ti_open.restype = c_void_p tabix.ti_querys.argtypes = [c_void_p, c_char_p] tabix.ti_querys.restype = c_void_p tabix.ti_query.argtypes = [c_void_p, c_char_p, c_int, c_int] tabix.ti_query.restype = c_void_p tabix.ti_read.argtypes = [c_void_p, c_void_p, c_void_p] tabix.ti_iter_destroy.argtypes = [c_void_p] tabix.ti_close.argtypes = [c_void_p] # FIXME: explicit declarations for APIs not used in this script return tabix # OOP interface class Tabix: def __init__(self, fn, fnidx=0): self.tabix = tabix_init(); if (self.tabix == None): sys.stderr.write("[Tabix] Please make sure the shared library is compiled and available.\n") return self.fp = self.tabix.ti_open(fn, fnidx); def __del__(self): if (self.tabix): self.tabix.ti_close(self.fp) def fetch(self, chr, start=-1, end=-1): """Generator function that will yield each interval within the requested range from the requested file. """ if (self.tabix == None): return if (start < 0): iter = self.tabix.ti_querys(self.fp, chr) # chr looks like: "chr2:1,000-2,000" or "chr2" else: iter = self.tabix.ti_query(self.fp, chr, start, end) # chr must be a sequence name if (iter == None): sys.stderr.write("[Tabix] Malformatted query or wrong sequence name.\n") return while (1): # iterate s = self.tabix.ti_read(self.fp, iter, 0) if (s == None): break yield s self.tabix.ti_iter_destroy(iter) # command-line interface def main(): if (len(sys.argv) < 3): sys.stderr.write("Usage: tabix.py \n") sys.exit(1) # report the features in the requested interval tabix = Tabix(sys.argv[1]) for line in tabix.fetch(sys.argv[2]): print line if __name__ == '__main__': main()