#include "hdf5.h" #include "hdf5_hl.h" #include #include #include #define SAMPLES 17 #define CHROMLEN 6 #define IDLEN 12 #define FILTERLEN 8 #define INFOLEN 30 #define FORMATLEN 30 #define NGENOFIELDS (hsize_t) 9 #define NSITEFIELDS (hsize_t) 9 #define NRECORDS (hsize_t) 6524 //define NRECORDS (hsize_t) 65248402 typedef struct Geno { int GT_0; int GT_1; int ATG; int MQ; int HCG; int GQ; int DP; } Geno; typedef struct Site{ char chrom[CHROMLEN]; int pos; char id[IDLEN]; char ref; char alt; float qual; char filter[FILTERLEN]; char info[INFOLEN]; char format[FORMATLEN]; } Site; int main(int argc, char *argv[]){ if (argc<2){ printf("no filename. %i args\n",argc); return -1; } Geno* dst_buf=malloc(sizeof(Geno)*NRECORDS); /* Calculate the size and the offsets of our struct members in memory */ size_t dst_size = sizeof(Geno); size_t dst_offset[NGENOFIELDS] = { HOFFSET(Geno, GT_0 ), HOFFSET(Geno, GT_1), HOFFSET(Geno, ATG), HOFFSET(Geno, MQ ), HOFFSET(Geno, HCG ), HOFFSET(Geno, GQ ), HOFFSET(Geno, DP ) }; /* Define field information */ const char *geno_field_names[NGENOFIELDS] = { "GT_0", "GT_1", "ATG", "MQ", "HCG", "GQ", "DP" }; hid_t geno_field_type[NGENOFIELDS]; hid_t file_id; hsize_t chunk_size = 10; int *fill_data = NULL; int compress = 0; herr_t status; /* Initialize field_type */ geno_field_type[0] = H5T_NATIVE_INT; geno_field_type[1] = H5T_NATIVE_INT; geno_field_type[2] = H5T_NATIVE_INT; geno_field_type[3] = H5T_NATIVE_INT; geno_field_type[4] = H5T_NATIVE_INT; geno_field_type[5] = H5T_NATIVE_INT; geno_field_type[6] = H5T_NATIVE_INT; /* Create a new file using default properties. */ file_id = H5Fcreate( "strains17.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT ); hid_t group_id; /* identifiers */ /* Create a group */ group_id = H5Gcreate(file_id, "/Genotypes", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); Site * site_buf=malloc(sizeof(Site)*NRECORDS); /* Calculate the size and the offsets of our struct members in memory */ size_t site_size = sizeof(Site); /* Initialize field_type */ hid_t string_chrom = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_chrom, CHROMLEN); hid_t string_id = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_id, IDLEN); hid_t string_filter = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_filter, FILTERLEN); hid_t string_info = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_info, INFOLEN); hid_t string_format = H5Tcopy( H5T_C_S1 ); H5Tset_size( string_format, FORMATLEN); const char *site_field_names[NSITEFIELDS] = {"chrom","pos","id","ref","alt","qual","filter","info","format" }; hid_t site_field_type[NSITEFIELDS]; site_field_type[0] = string_chrom; site_field_type[1] = H5T_NATIVE_INT; site_field_type[2] = string_id; site_field_type[3] = H5T_NATIVE_CHAR; site_field_type[4] = H5T_NATIVE_CHAR; site_field_type[5] = H5T_NATIVE_FLOAT; site_field_type[6] = string_filter; site_field_type[7] = string_info; site_field_type[8] = string_format; size_t site_offset[NSITEFIELDS] = { HOFFSET(Site, chrom ), HOFFSET(Site, pos ), HOFFSET(Site, id ), HOFFSET(Site, ref ), HOFFSET(Site, alt), HOFFSET(Site, qual ), HOFFSET(Site, filter ), HOFFSET(Site, info ), HOFFSET(Site, format ), }; Site site; //array of array of genotypes Geno * genos[SAMPLES]; int p; for (p=0; p