#include "hdf5.h" #include "hdf5_hl.h" #include #include #include #define SAMPLES 20 #define MAXLINES 100 #define CHROMLEN 6 #define IDLEN 12 #define FILTERLEN 8 #define INFOLEN 300 #define FORMATLEN 50 #define NGENOFIELDS (hsize_t) 9 #define NSITEFIELDS (hsize_t) 9 #define NRECORDS (hsize_t) 512122 typedef struct Geno { int GT_0; int GT_1; int AD_ref; int AD_alt; int DP; float GQ; int AA_like; int AB_like; int BB_like; } Geno; typedef struct Site{ char chrom[CHROMLEN]; int pos; char id[IDLEN]; char ref; char alt; float qual; char filter[FILTERLEN]; char info[INFOLEN]; char format[FORMATLEN]; } Site; int main(int argc, char *argv[]){ if (argc<2){ printf("no filename. %i args\n",argc); return -1; } Geno* dst_buf=malloc(sizeof(Geno)*NRECORDS); /* Calculate the size and the offsets of our struct members in memory */ size_t dst_size = sizeof(Geno); size_t dst_offset[NGENOFIELDS] = { HOFFSET(Geno, GT_0 ), HOFFSET(Geno, GT_1 ), HOFFSET(Geno, AD_ref ), HOFFSET(Geno, AD_alt ), HOFFSET(Geno, DP ), HOFFSET(Geno, GQ ), HOFFSET(Geno, AA_like ), HOFFSET(Geno, AB_like ), HOFFSET(Geno, BB_like ), }; size_t dst_sizes[NGENOFIELDS] = { sizeof(dst_buf[0].GT_0), sizeof(dst_buf[0].GT_1), sizeof(dst_buf[0].AD_ref), sizeof(dst_buf[0].AD_ref), sizeof(dst_buf[0].DP), sizeof(dst_buf[0].GQ), sizeof(dst_buf[0].AA_like), sizeof(dst_buf[0].AB_like), sizeof(dst_buf[0].BB_like), }; /* Define field information */ const char *geno_field_names[NGENOFIELDS] = { "GT_0", "GT_1", "AD_ref", "AD_alt", "DP", "GQ", "AA_like", "AB_like", "BB_like" }; hid_t geno_field_type[NGENOFIELDS]; hid_t file_id; hsize_t chunk_size = 10; int *fill_data = NULL; int compress = 0; herr_t status; /* Initialize field_type */ geno_field_type[0] = H5T_NATIVE_INT; geno_field_type[1] = H5T_NATIVE_INT; geno_field_type[2] = H5T_NATIVE_INT; geno_field_type[3] = H5T_NATIVE_INT; geno_field_type[4] = H5T_NATIVE_INT; geno_field_type[5] = H5T_NATIVE_FLOAT; geno_field_type[6] = H5T_NATIVE_INT; geno_field_type[7] = H5T_NATIVE_INT; geno_field_type[8] = H5T_NATIVE_INT; /* Create a new file using default properties. */ file_id = H5Fcreate(argv[1], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT ); hid_t group_id; /* identifiers */ /* Create a group */ group_id = H5Gcreate(file_id, "/Genotypes", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); Site * site_buf=malloc(sizeof(Site)*NRECORDS); /* Calculate the size and the offsets of our struct members in memory */ size_t site_size = sizeof(Site); /* Initialize field_type */ hid_t string_chrom = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_chrom, CHROMLEN); hid_t string_id = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_id, IDLEN); hid_t string_filter = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_filter, FILTERLEN); hid_t string_info = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_info, INFOLEN); hid_t string_format = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_format, FORMATLEN); const char *site_field_names[NSITEFIELDS] = {"chrom","pos","id","ref","alt","qual","filter","info","format" }; hid_t site_field_type[NSITEFIELDS]; site_field_type[0] = string_chrom; site_field_type[1] = H5T_NATIVE_INT; site_field_type[2] = string_id; site_field_type[3] = H5T_NATIVE_CHAR; site_field_type[4] = H5T_NATIVE_CHAR; site_field_type[5] = H5T_NATIVE_FLOAT; site_field_type[6] = string_filter; site_field_type[7] = string_info; site_field_type[8] = string_format; size_t site_offset[NSITEFIELDS] = { HOFFSET(Site, chrom ), HOFFSET(Site, pos ), HOFFSET(Site, id ), HOFFSET(Site, ref ), HOFFSET(Site, alt), HOFFSET(Site, qual ), HOFFSET(Site, filter ), HOFFSET(Site, info ), HOFFSET(Site, format ), }; Site site; size_t site_sizes[NSITEFIELDS] = { sizeof(site.chrom ), sizeof(site.pos ), sizeof(site.id), sizeof(site.ref ), sizeof(site.alt), sizeof(site.qual ), sizeof(site.filter ), sizeof(site.info ), sizeof(site.format ), }; //array of array of genotypes Geno * genos[SAMPLES]; int p; for (p=0; p