/* udcFuse - FUSE (Filesystem in USErspace) filesystem for lib/udc.c (Url Data Cache). */ #ifdef USE_FUSE #include "common.h" #include "portable.h" #include "errCatch.h" #include "udc.h" #include #include #include #ifndef FUSE_USE_VERSION #define FUSE_USE_VERSION 26 #endif #include "fuse.h" void usage() /* Explain usage and exit. */ { errAbort( "udcFuse - FUSE (Filesystem in USErspace) filesystem for lib/udc.c (Url Data Cache)\n" "usage:\n" " udcFuse [options] emptyDirMountPoint [udcCacheDir]\n" "options:\n" " -d: run in debug mode\n" ); } // Important bits from http://sourceforge.net/apps/mediawiki/fuse/index.php?title=FuseInvariants: // -------------------------------------------------------------------------- // * All requests are absolute, i.e. all paths begin with / and // include the complete path to a file or a directory. Symlinks, // . and .. are already resolved. // * For every request you can get except for getattr(), read() and // write(), usually for every path argument (both source and // destination for link and rename, but only the source for // symlink), you will get a getattr() request just before the // callback. // For example, suppose I store file names of files in a filesystem // also into a database. To keep data in sync, I would like, for // each filesystem operation that succeeds, to check if the file // exists on the database. I just do this in the getattr() call, // since all other calls will be preceded by a getattr. // * The arguments for every request are already verified as much as // possible. This means that, for example // * readdir() is only called with an existing directory name // ... // * read() and write() are only called if the file has been opened // with the correct flags // -------------------------------------------------------------------------- // Since this is run by a kernel module and can't just bail when there // is a problem. Wrap errCatch (which has been made pthread-safe) // around any calls to kent/src code. #define ERR_CATCH_START() \ { \ struct errCatch *catch = errCatchNew(); \ if (errCatchStart(catch)) \ { // code that can errAbort goes between ERR_CATCH_START and ERR_CATCH_END, // calling ERR_CATCH_FREE if it does its own return statement: #define ERR_CATCH_FREE() errCatchFree(&catch) #define ERR_CATCH_END(msg) \ } \ errCatchEnd(catch); \ if (catch->gotError) \ { \ fprintf(stderr, "%s errCatch: %s", (msg), catch->message->string); \ ERR_CATCH_FREE(); \ return -1; \ } \ ERR_CATCH_FREE(); \ } static int checkForFile(const char *path, char *udcCachePath, struct stat *stbuf, int pid) /* When a udc cache directory has "bitmap" and "sparseData" files, it * corresponds to a file URL and a udcFile object. Modify stbuf->st_mode * to reflect a file not a directory. */ { if (stbuf->st_mode | S_IFDIR) { DIR *dirHandle = opendir(udcCachePath); if (dirHandle != NULL) { // should we make sure that there are not also subdirectories?? boolean gotBitmap = FALSE, gotSparse = FALSE; int isEmpty = TRUE; struct dirent *dirInfo; while ((dirInfo = readdir(dirHandle)) != NULL) { if (sameString(dirInfo->d_name, ".") || sameString(dirInfo->d_name, "..")) continue; isEmpty = FALSE; if (sameString(dirInfo->d_name, "bitmap")) gotBitmap = TRUE; else if (sameString(dirInfo->d_name, "sparseData")) gotSparse = TRUE; if (gotBitmap && gotSparse) break; } // fuse gets confused when a cache path is an empty dir, and then suddenly morphs // into a regular file, as happens when old cache files are removed by the // trash cleaner but then reappear due to a new udc access. So if empty dir, // just tell fuse that it doesn't exist. if (isEmpty) return -ENOENT; if (gotBitmap || gotSparse) { if (gotBitmap ^ gotSparse) fprintf(stderr, "...[%d] getattr: got one cache file but not the other - stale?\n", pid); stbuf->st_mode &= ~(S_IFDIR | S_IXUSR | S_IXGRP | S_IXOTH); stbuf->st_mode |= S_IFREG; // Now we need to set the actual size in stbuf, otherwise fuse will think // the size is 4096 or however many bytes have been cached so far, and will // prevent callers from reading past that. char buf[4096]; char *url = NULL; long long size = -1; ERR_CATCH_START(); url = udcPathToUrl(path, buf, sizeof(buf), NULL); size = udcSizeFromCache(url, NULL); ERR_CATCH_END("udcPathToUrl or udcSizeFromCache"); if (size < 0) fprintf(stderr, "...[%d] getattr: failed to get udc cache size for %s", pid, url); else stbuf->st_size = size; } closedir(dirHandle); } else { fprintf(stderr, "...[%d] getattr: failed to opendir(%s)!: %s\n", pid, udcCachePath, strerror(errno)); return -errno; } } return 0; } #define HTTP_PATH_PREFIX "/http/" #define QENCODED_AT_SIGN "Q40" static int fusePathToUdcPath(const char *path, char *udcPath, size_t udcPathSize) /* The udc cache path is almost always just udcDefaultDir() + fuse path, * except when the fuse path includes qEncoded http auth info -- necessary for * reconstructing the URL, but not included in the udc cache path. * Return -1 for problem, 0 for OK. */ { char *httpHost = NULL; if (startsWith(HTTP_PATH_PREFIX, path)) httpHost = (char *)path + strlen(HTTP_PATH_PREFIX); if (httpHost) { char *atSign = strstr(httpHost, QENCODED_AT_SIGN); char *nextSlash = strchr(httpHost, '/'); if (atSign != NULL && (nextSlash == NULL || atSign < nextSlash)) { ERR_CATCH_START(); safef(udcPath, udcPathSize, "%s" HTTP_PATH_PREFIX "%s", udcDefaultDir(), atSign+strlen(QENCODED_AT_SIGN)); ERR_CATCH_END("safef udcPath (skipping auth)"); return 0; } } ERR_CATCH_START(); safef(udcPath, udcPathSize, "%s%s", udcDefaultDir(), path); ERR_CATCH_END("safef udcPath"); return 0; } static int udcfs_getattr(const char *path, struct stat *stbuf) /* According to http://sourceforge.net/apps/mediawiki/fuse/index.php?title=FuseInvariants , * getattr() is called to test existence before every other command except read, write and * getattr itself. Give stat of corresponding udc cache file (but make it read-only). */ { unsigned int pid = pthread_self(); char udcCachePath[4096]; if (fusePathToUdcPath(path, udcCachePath, sizeof(udcCachePath)) < 0) return -1; int res = stat(udcCachePath, stbuf); if (res != 0) { fprintf(stderr, "...[%d] getattr: stat(%s) failed (%d): %s\n", pid, udcCachePath, res, strerror(errno)); return -errno; } // Force read-only permissions: stbuf->st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); int ret = checkForFile(path, udcCachePath, stbuf, pid); fprintf(stderr, "...[%d] getattr %s finish %ld\n", pid, path, clock1000()); return ret; } static boolean isEmptyDir(char *udcCachePath, char *subdir) /* Return TRUE if subdir of path is a directory with no children. */ { boolean isDir = FALSE, isEmpty = TRUE; char fullPath[4096]; ERR_CATCH_START(); safef(fullPath, sizeof(fullPath), "%s/%s", udcCachePath, subdir); ERR_CATCH_END("safef fullPath"); DIR *dirHandle = opendir(fullPath); if (dirHandle != NULL) { isDir = TRUE; struct dirent *dirInfo; while ((dirInfo = readdir(dirHandle)) != NULL) { if (sameString(dirInfo->d_name, ".") || sameString(dirInfo->d_name, "..")) continue; isEmpty = FALSE; break; } closedir(dirHandle); } return isDir && isEmpty; } static int udcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) /* Read the corresponding udc cache directory. */ { unsigned int pid = pthread_self(); char udcCachePath[4096]; if (fusePathToUdcPath(path, udcCachePath, sizeof(udcCachePath)) < 0) return -1; DIR *dirHandle = opendir(udcCachePath); if (dirHandle == NULL) { fprintf(stderr, "...[%d] readdir: opendir(%s) failed!: %s\n", pid, udcCachePath, strerror(errno)); return -errno; } struct dirent *dirInfo; while ((dirInfo = readdir(dirHandle)) != NULL) // getattr denies the existence of empty udcCache directories because they // might get bitmap and sparseData files and then we report them as regular // files not directories. Filter out components here that are empty directories: if (sameString(".", dirInfo->d_name) || sameString("..", dirInfo->d_name) || !isEmptyDir(udcCachePath, dirInfo->d_name)) { if (filler(buf, dirInfo->d_name, NULL, 0)) break; } int ret = closedir(dirHandle); fprintf(stderr, "...[%d] readdir %s finish %ld\n", pid, path, clock1000()); return ret; } static int udcfs_open(const char *path, struct fuse_file_info *fi) /* Call udcOpen() and stash the handle in fi->fh for use by later calls. */ { if ((fi->flags & (O_RDONLY | O_WRONLY | O_RDWR)) != O_RDONLY) return -EACCES; unsigned int pid = pthread_self(); fprintf(stderr, "...[%d] open(%s) start %ld\n", pid, path, clock1000()); struct udcFile *udcf = NULL; ERR_CATCH_START(); char buf[4096]; char *url = udcPathToUrl(path, buf, sizeof(buf), NULL); if (url != NULL) { if (udcCacheAge(url, NULL) < udcCacheTimeout()) fi->keep_cache = 1; udcf = udcFileMayOpen(url, NULL); fprintf(stderr, "...[%d] open -> udcFileMayOpen(%s) -> 0x%llx\n", pid, url, (long long)udcf); } else { fprintf(stderr, "...[%d] open: Unable to translate path %s to URL!\n", pid, path); ERR_CATCH_FREE(); return -1; } ERR_CATCH_END("udcPathToUrl, udcCacheAge or udcFileMayOpen"); if (udcf == NULL) { fprintf(stderr, "...[%d] open: Unable to open udcFile for %s!\n", pid, path); return -1; } fi->fh = (uint64_t)udcf; fprintf(stderr, "...[%d] open fh=0x%llx finish %ld\n", pid, (long long)(fi->fh), clock1000()); return 0; } static int udcfs_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi) /* udcSeek to specified offset, udcRead size bytes into buf, return #bytes read. */ { unsigned int pid = pthread_self(); fprintf(stderr, "...[%d] read(%s, size=%lld, offset=%lld, fh=0x%llx) start %ld\n", pid, path, (long long)size, (long long)offset, (long long)(fi->fh), clock1000()); struct udcFile *udcf = (struct udcFile *)(fi->fh); if (udcf == NULL) { fprintf(stderr, "...[%d] read: fuse_file_info fh is NULL -- can't read.\n", pid); return -1; } ERR_CATCH_START(); udcSeek(udcf, (bits64)offset); size = udcRead(udcf, buf, size); ERR_CATCH_END("udcSeek or udcRead"); fprintf(stderr, "...[%d] read %lld bytes finish %ld\n", pid, (long long)size, clock1000()); return size; } static int udcfs_release(const char *path, struct fuse_file_info *fi) // Close the udcFile stored as fi->fh. { unsigned int pid = pthread_self(); fprintf(stderr, "...[%d] release %s (0x%llx) %ld\n", pid, path, (long long)(fi->fh), clock1000()); ERR_CATCH_START(); udcFileClose((struct udcFile **)&(fi->fh)); ERR_CATCH_END("udcFileClose"); return 0; } static struct fuse_operations udcfs_oper = { .getattr = udcfs_getattr, .readdir = udcfs_readdir, .open = udcfs_open, .read = udcfs_read, .release = udcfs_release, }; void checkUdcCacheDir() /* Make sure udcDefaultDir() is a readable directory. */ { DIR *udcCacheHandle = opendir(udcDefaultDir()); if (udcCacheHandle == NULL) { fprintf(stderr, "Error: Can't open udc local cache directory '%s': %s\n", udcDefaultDir(), strerror(errno)); exit(1); } closedir(udcCacheHandle); } int main(int argc, char *argv[]) /* udcFuse - FUSE (Filesystem in USErspace) filesystem for lib/udc.c (Url Data Cache). */ { int minArgc = 2; int i; for (i = 1; i < argc; i++) { if (argv[i][0] == '-') minArgc++; } if (argc < minArgc || argc > minArgc+1) usage(); if (argc == minArgc+1) { udcSetDefaultDir(argv[argc-1]); // Fuse does not like getting an extra arg. argc--; } // Use kernel caching, and tell udc not to ping server, if cache files are // less than an hour old. (Should make this a command-line opt.) udcSetCacheTimeout(3600); #ifndef UDC_TEST return fuse_main(argc, argv, &udcfs_oper, NULL); #else // TEST MAIN -- don't call fuse, just call methods the way we imagine // fuse would call them. #define TESTFILLER_BUFSIZE 256 int testFiller(void *buf, const char *name, const struct stat *stbuf, off_t off) // Impersonate fuse's readdir callback (type fuse_fill_dir_t) { printf(" -> testFiller(%s)\n", name); return 0; } #define checkRet(ret) \ { \ if (ret < 0) \ { \ printf("Doh!: %s\n", strerror(-ret)); \ exit(1); \ } \ } #define UDC_TEST_PATH "/ftp/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom22.SLX.maq.SRP000032.2009_07.bam" #define UDC_TEST_PATH2 "/ftp/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom21.SLX.maq.SRP000032.2009_07.bam" udcfs_oper.getattr = udcfs_oper.getattr; // avoid unused-var warning. struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); struct stat stbuf; char buf[TESTFILLER_BUFSIZE]; int ret; ret = udcfs_getattr(UDC_TEST_PATH, &stbuf); printf("Got %d from getattr; stbuf.st_mode=0%llo\n\n", ret, (long long)stbuf.st_mode); checkRet(ret); ret = udcfs_readdir("/", buf, testFiller, 0, &fi); printf("Got %d from readdir\n\n", ret); checkRet(ret); ret = udcfs_readdir("/ftp", buf, testFiller, 0, &fi); printf("Got %d from readdir\n\n", ret); checkRet(ret); ret = udcfs_open(UDC_TEST_PATH, &fi); printf("Got %d from open -> udc handle 0x%llx\n\n", ret, (long long)(fi.fh)); checkRet(ret); ret = udcfs_read(UDC_TEST_PATH, buf, 4, 0, &fi); printf("Got %d bytes: 0x%x from read @0 on 0x%llx!\n\n", ret, *(unsigned int *)buf, (long long)(fi.fh)); checkRet(ret); // Make sure we can have two open handles on the same file at the same time: struct fuse_file_info fi2; memset(&fi2, 0, sizeof(fi2)); ret = udcfs_open(UDC_TEST_PATH2, &fi2); printf("Got %d from open -> second udc handle 0x%llx\n\n", ret, (long long)(fi2.fh)); checkRet(ret); ret = udcfs_read(UDC_TEST_PATH2, buf, 4, 8, &fi2); printf("Got %d bytes: 0x%x from read @8 on second handle 0x%llx!\n\n", ret, *(unsigned int *)buf, (long long)(fi2.fh)); checkRet(ret); ret = udcfs_read(UDC_TEST_PATH2, buf, 4, 8, &fi); printf("Got %d bytes: 0x%x from read @8 on first handle 0x%llx!\n\n", ret, *(unsigned int *)buf, (long long)(fi.fh)); checkRet(ret); ret = udcfs_read(UDC_TEST_PATH, buf, 8, 9000, &fi2); printf("Got %d bytes: 0x%llx from read @9000 on second handle 0x%llx!\n\n", ret, *(unsigned long long *)buf, (long long)(fi2.fh)); checkRet(ret); ret = udcfs_release(UDC_TEST_PATH2, &fi2); printf("Got %d from release of second handle; now fi2.fh is 0x%llx\n\n", ret, (long long)(fi2.fh)); checkRet(ret); ret = udcfs_read(UDC_TEST_PATH, buf, 8, 9000, &fi); printf("Got %d bytes: 0x%llx from read @9000 on 0x%llx!\n\n", ret, *(unsigned long long *)buf, (long long)(fi.fh)); checkRet(ret); ret = udcfs_release(UDC_TEST_PATH, &fi); printf("Got %d from release; now fi.fh is 0x%llx\n\n", ret, (long long)(fi.fh)); checkRet(ret); // Now try to getattr something that has not (at the moment anyway) yet been opened in udc first: #define UDC_TEST_PATH3 "/ftp/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom9.SLX.maq.SRP000032.2009_07.bam" memset(&stbuf, 0, sizeof(stbuf)); ret = udcfs_getattr(UDC_TEST_PATH3, &stbuf); printf("Got %d from getattr; stbuf.st_mode=0%llo\n\n", ret, (long long)stbuf.st_mode); checkRet(ret); return 0; #endif//def UDC_TEST } #else // no USE_FUSE #include int main(int argc, char *argv[]) { printf("udcFuse requires the FUSE (filesystem in userspace) library -- make sure that is installed and add USE_FUSE=1 to your enviroment.\n"); return 0; } #endif//def USE_FUSE