/* encodeCvToDb - Make a relational database version of cv.ra. */ /* Currently this is implemented in three main steps: * 1) Read in cv.ra into a list of stanzaTypes. * 2) Rearrange stanzaTypes a bit to reduce redundancy and standardize names. * 3) Output in a variety of formats. In some formats will stream through cv.ra again. * In other implementation notes, this module is deliberately casual about memory * handling. It assumes nothing will be freed, a valid strategy since cv.ra * is quite small, only about 1/2 meg even after 4 years of use. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "portable.h" #include "obscure.h" #include "ra.h" char *tablePrefix = ""; /* prefix to give to table names. */ void usage() /* Explain usage and exit. */ { errAbort( "encodeCvToDb - Make a relational database version of cv.ra\n" "usage:\n" " encodeCvToDb cv.ra out.stats out.atree out.as out.sql out.django outTabDir\n" "options:\n" " -tablePrefix=cvDb_ - Some prefix to prepend to all table names - for benefit of Django\n" ); } static struct optionSpec options[] = { {NULL, 0}, }; enum sfType {sftString = 0, sftUnsigned, sftInt, sftFloat, sftLongString}; /* Possible types of a simple field. */ struct stanzaField /* Information about a field (type of line) with a given stanza */ { struct stanzaField *next; char *name; /* Name of field, first word in line containing field. */ char *symbol; /* Similar to name, but no white space and always camelCased. */ struct hash *uniq; /* Count-valued hash of all observed values. */ enum sfType valType; /* Type of values we've seen */ int count; /* Number of times field used */ int countFloat; /* Number of times contents of field are a float-compatible format. */ int countUnsigned; /* Number of times contents of field could be unsigned int */ int countInt; /* Number of times contents of field could be unsigned int */ int maxSize; /* Longest observed size. */ struct stanzaField *children; /* Allows fields to be in a tree. */ }; struct stanzaType /* A particular type of stanza, as defined by the type field within the stanza */ { struct stanzaType *next; char *name; /* Name of type - something like Cell Line. */ char *symbol; /* Similar to name, but no white space and always camelCased. */ int count; /* Number of times stanza type observed. */ int lastId; /* Last ID we'll use in output to identify row int table. */ struct stanzaField *fieldList; /* Fields, in order of observance. */ FILE *f; /* File to output data in if any */ struct stanzaType *splitSuccessor; /* If split, the next table in the split. */ }; struct fieldLabel /* Information on labels that describe a field. Just used to make a big static table */ { char *typePattern; /* Types this applies to. Put more specific types first. */ char *fieldName; /* Name of field */ char *description; /* The actual comment. */ }; struct fieldLabel fieldDescriptions[] = { /* Descriptions of fields, with types being from most to least specific. */ {"cellType", "lineage", "High level developmental lineage of cell."}, {"cellType", "tier", "ENCODE cell line tier. 1-3 with 1 being most commonly used, 3 least."}, {"cellType", "protocol", "Scientific protocol used for growing cells"}, {"cellType", "category", "Category of cell source - Tissue, primaryCells, etc."}, {"cellType", "childOf", "Name of cell line or tissue this cell is descended from."}, {"cellType", "derivedFrom", "Tissue or other souce of original cells. Depreciated?"}, {"antibody", "validation", "How antibody was validated to be specific for target."}, {"antibody", "displayName", "Descriptive short but not necessarily unique name for antibody."}, {"antibody", "antibodyDescription", "Short description of antibody itself."}, {"antibody", "target", "Molecular target of antibody."}, {"antibody", "targetDescription", "Short description of antibody target."}, {"antibody", "targetUrl", "Web page associated with antibody target."}, {"antibody", "targetId", "Identifier for target, prefixed with source of ID, usually GeneCards"}, {"antibody", "targetClass", "Classification of the biological function of the target gene"}, {"antibodyTarget", "target", "Molecular target of antibody."}, {"antibodyTarget", "targetDescription", "Short description of antibody target."}, {"antibodyTarget", "externalUrl", "Web page associated with antibody target."}, {"antibodyTarget", "externalId", "Identifier for target, prefixed with source of ID, usually GeneCards"}, {"dataType", "dataGroup", "High level grouping of experimental assay type."}, {"age", "stage", "High level place within life cycle of donor organism."}, {"lab", "labInst", "The institution where the lab is located."}, {"lab", "labPi", "Last name or other short identifier for lab's primary investigator"}, {"lab", "labPiFull", "Full name of lab's primary investigator."}, {"lab", "grantPi", "Last name of primary investigator on grant paying for data."}, {"grantee", "grantInst", "Name of instution awarded grant paying for data."}, {"grantee", "projectName", "Short name describing grant."}, {"typeOfTerm", "searchable", "Describes how to search for term in Genome Browser. 'No' for unsearchable."}, {"typeOfTerm", "cvDefined", "Is there a controlled vocabulary for this term. Is 'yes' or 'no.'"}, {"typeOfTerm", "validate", "Describes how to validate field typeOfTerm refers to. Use 'none' for no validation."}, {"typeOfTerm", "hidden", "Hide field in user interface? Can be 'yes' or 'no' or a release list"}, {"typeOfTerm", "priority", "Order to display or search terms, lower is earlier."}, {"typeOfTerm", "requiredVars", "Required fields for a term of this type."}, {"typeOfTerm", "optionalVars", "Optional fields for a term of this type."}, {"*", "id", "Unique unsigned integer identifier for this item"}, {"*", "term", "A relatively short label, no more than a few words"}, {"*", "symbol", "A short human and machine readable symbol with just alphanumeric characters."}, {"*", "tag", "A short human and machine readable symbol with just alphanumeric characters."}, {"*", "deprecated", "If non-empty, the reason why this entry is obsolete."}, {"*", "label", "A relatively short label, no more than a few words"}, {"*", "description", "A description up to a paragraph long of plain text."}, {"*", "shortLabel", "A one or two word (less than 18 character) label."}, {"*", "longLabel", "A sentence or two label."}, {"*", "organism", "Common name of donor organism."}, {"*", "tissue", "Tissue source of sample."}, {"*", "vendorName", "Name of vendor selling reagent."}, {"*", "vendorId", "Catalog number of other way of identifying reagent."}, {"*", "orderUrl", "Web page to order regent."}, {"*", "karyotype", "Status of chromosomes in cell - usually either normal or cancer."}, {"*", "termId", "ID of term in external controlled vocabulary. See also termUrl."}, {"*", "termUrl", "External URL describing controlled vocabulary."}, {"*", "color", "Red,green,blue components of color to visualize, each 0-255."}, {"*", "sex", "M for male, F for female, B for both, U for unknown."}, {"*", "lab", "Scientific lab producing data."}, {"*", "lots", "The specific lots of reagent used."}, {"*", "age", "Age of donor organism."}, {"*", "strain", "Strain of organism."}, {"*", "geoPlatformName", "Short description of sequencing platform. Matches term used by GEO."}, }; char *camelCaseToSeparator(char *input, char separator) /* Convert something like thisIsMyVar to something all lower case with separator characters * inserted in front of each formerly capitalized letter. If separator is '_' it would convert * 'thisIsMyVar' to 'this_is_my_var' in a dynamically allocated string. */ { int inLen = strlen(input); assert(inLen > 0 && inLen < 1024); int maxOutLen = 2*inLen; char outBuf[maxOutLen+1]; char c, *in = input, *out = outBuf; /* Copy first letter unaltered */ *out++ = *in++; /* Loop through throwing in an underbar every time you see a capital letter. */ while ((c = *in++) != 0) { if (isupper(c)) { *out++ = separator; c = tolower(c); } *out++ = c; } *out++ = 0; // end out string /* Store output to more permanent memory. */ return cloneString(outBuf); } char *searchFieldDescription(char *type, char *field) /* Search field description table for match to field. */ { int i; for (i=0; itypePattern, type) && sameString(d->fieldName, field)) return d->description; } errAbort("Can't find description for %s.%s. Please add to fieldDescriptions in source code.", type, field); return NULL; } void setValType(struct stanzaField *field) /* Set valType field based on count, countFloat, countUnsigned, countInt */ { enum sfType valType = sftString; int count = field->count; if (count != 0) { if (count == field->countUnsigned) valType = sftUnsigned; else if (count == field->countInt) valType = sftInt; else if (count == field->countFloat) valType = sftFloat; else if (field->maxSize > 255) valType = sftLongString; } field->valType = valType; } void ensureGoodSymbol(char *label) /* Make sure that label is a good symbol for us - no spaces, starts with lower case. */ { if (!islower(label[0])) errAbort("Label \"%s\" must start with lower case letter.", label); char c, *s = label; while ((c = *(++s)) != 0) if (!isalnum(c) && c != '_') errAbort("Char '%c' is not allowed in label \"%s.\"", c, label); } char *fieldLabelToSymbol(char *label) /* Convert a field label to one we want to use. */ { ensureGoodSymbol(label); return label; } struct stanzaField *stanzaFieldNew(char *name) /* Construct a new field tag around given name. */ { struct stanzaField *field; AllocVar(field); field->uniq = hashNew(8); field->name = cloneString(name); field->symbol = fieldLabelToSymbol(name); return field; } struct stanzaField *stanzaFieldFind(struct stanzaField *list, char *name) /* Return list element of given name, or NULL if not found. */ { struct stanzaField *el; for (el = list; el != NULL; el = el->next) if (sameString(name, el->name)) break; return el; } struct stanzaField *stanzaFieldFindSymbol(struct stanzaField *list, char *symbol) /* Return list element of given symbol, or NULL if not found. */ { struct stanzaField *el; for (el = list; el != NULL; el = el->next) if (sameString(symbol, el->symbol)) break; return el; } char *typeLabelToSymbol(char *label) /* Convert a label that may have spaces or bad casing to something else. Rather than being * generic this just handles the two known special cases, and does some sanity checks. */ { if (sameString(label, "Cell Line")) { return "cellType"; } else if (sameString(label, "Antibody")) return "antibody"; else { ensureGoodSymbol(label); return label; } } struct stanzaType *stanzaTypeNew(char *name, struct hash *typeHash) /* Allocate new stanzaType and save it in hash. */ { struct stanzaType *type; AllocVar(type); hashAddSaveName(typeHash, name, type, &type->name); type->symbol = typeLabelToSymbol(name); return type; } struct slPair *requiredTag(struct lineFile *lf, struct slPair *stanza, char *tag) /* Make sure there is a line that begins with tag in the stanza, and return it. */ { struct slPair *pair = slPairFind(stanza, tag); if (pair == NULL) errAbort("Error in stanza ending line %d of %s: required tag \"%s\" is missing.", lf->lineIx, lf->fileName, tag); return pair; } boolean isAllDigits(char *s) /* Return true if all characters in string are digits. */ { char c; while ((c = *s++) != 0) if (!isdigit(c)) return FALSE; return TRUE; } boolean isInt(char *s) /* Return true if it looks like an integer */ { if (*s == '-') ++s; return isAllDigits(s); } boolean isFloat(char *s) /* Return TRUE if it looks like floating point (or int). Resorts to strtod because float formatting * is complex. */ { char *end; strtod(s, &end); int size = end-s; return size == strlen(s); } struct slPair *nextStanza(struct lineFile *lf) /* Return next stanza with a little extra error checking. Specifically make sure * no tags without values, no duplicate tags. */ { struct slPair *stanza = raNextStanzAsPairs(lf); if (stanza != NULL) { struct hash *uniq = hashNew(5); struct slPair *pair; for (pair = stanza; pair != NULL; pair = pair->next) { if (hashLookup(uniq, pair->name)) errAbort("Duplicate \"%s\" tag in stanza ending line %d of %s", pair->name, lf->lineIx, lf->fileName); else hashAdd(uniq, pair->name, NULL); char *val = pair->val; if (val == NULL) errAbort("Tag \"%s\" without value in stanza ending line %d of %s", pair->name, lf->lineIx, lf->fileName); /* Tabs will also mess us up. */ if (strchr(val, '\t')) errAbort("Internal tab in tag \"%s\" in stanza ending line %d of %s", pair->name, lf->lineIx, lf->fileName); } hashFree(&uniq); } return stanza; } void stanzaTypesFromCvRa(char *inCvRa, struct stanzaType **retTypeList, struct hash **retHash, struct hash **retTypeOfTermHash) /* Read cv.ra file into a list of stanzaTypes. Return list and also a hash of same items * keyed by type name. */ { /* Stream through input and build up hash and list of stanzaTypes */ struct lineFile *lf = lineFileOpen(inCvRa, TRUE); struct hash *typeHash = hashNew(10); struct hash *typeOfTermHash = hashNew(10); struct stanzaType *typeList = NULL; struct slPair *stanza; while ((stanza = nextStanza(lf)) != NULL) { /* Get type field and enforce two other fields exist */ struct slPair *typePair = requiredTag(lf, stanza, "type"); struct slPair *termPair = requiredTag(lf, stanza, "term"); requiredTag(lf, stanza, "tag"); /* Get existing stanzaType structure or if need be make a new one and add to hash */ char *typeName = typePair->val; struct stanzaType *type = hashFindVal(typeHash, typeName); if (type == NULL) { type = stanzaTypeNew(typeName, typeHash); slAddTail(&typeList, type); } type->count += 1; /* Go through lines of stanza and make sure they are all fields in type */ struct slPair *line; for (line = stanza; line != NULL; line = line->next) { /* Get tag and it's value. */ char *tag = line->name; char *val = line->val; /* Get field that corresponds with tag, making a new one if need be. */ struct stanzaField *field = stanzaFieldFind(type->fieldList, tag); if (field == NULL) { field = stanzaFieldNew(tag); slAddTail(&type->fieldList, field); } /* Update counts, which later we'll use to figure out field type */ field->count += 1; if (isFloat(val)) { field->countFloat += 1; if (isInt(val)) { field->countInt += 1; if (isAllDigits(val)) field->countUnsigned += 1; } } int valSize = strlen(val); if (valSize > field->maxSize) field->maxSize = valSize; /* Update fields uniq hash to include this val of field. */ hashIncInt(field->uniq, val); } /* Add stanza as pairs to typeOfTerm hash */ if (sameString(typeName, "typeOfTerm")) { requiredTag(lf, stanza, "description"); hashAdd(typeOfTermHash, termPair->val, stanza); } } /* Set valType for fields */ struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { struct stanzaField *field; for (field = type->fieldList; field != NULL; field = field->next) setValType(field); } /* Clean up and go home. */ lineFileClose(&lf); *retTypeList = typeList; *retHash = typeHash; *retTypeOfTermHash = typeOfTermHash; } /******* Routines for output in various formats. *********/ void stanzaTypesToStats(struct stanzaType *typeList, char *fileName) /* Write out typeList to file in a format that expresses stats. */ { FILE *f = mustOpen(fileName, "w"); struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { fprintf(f, "%d %s\n", type->count, type->name); struct stanzaField *field; for (field = type->fieldList; field != NULL; field = field->next) { fprintf(f, " %s count %d, unique %d, float %d, int %d, unsigned %d\n", field->name, field->count, field->uniq->elCount, field->countFloat, field->countInt, field->countUnsigned); } } carefulClose(&f); } void stanzaTypeToTree(struct stanzaType *typeList, char *fileName) /* Write out typeList to file in aTree format that autoDtd also uses. * A sample might be: * person * $name * #yearsOld * %metersTall * $favoriteColor? * Where "person" corresponds to the stanza type, and this is followed by the * stanza fields indented. Before each field is a single character indicating * the type of field: $ for string, # for integer, and % for real number. * After the field a ? indicates the field is optional. */ { FILE *f = mustOpen(fileName, "w"); struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { fprintf(f, "%s\n", type->symbol); struct stanzaField *field; for (field = type->fieldList; field != NULL; field = field->next) { if (sameString(field->symbol, "type")) continue; spaceOut(f, 4); switch (field->valType) { case sftString: case sftLongString: fputc('$', f); break; case sftUnsigned: case sftInt: fputc('#', f); break; case sftFloat: fputc('%', f); break; default: internalErr(); break; } fputs(field->symbol, f); if (field->count != type->count) fputc('?', f); fputc('\n', f); } } carefulClose(&f); } void openTypeFiles(struct stanzaType *typeList, char *outDir, char *suffix) /* Open a file (to write) for each element of typeList. The file fill be in * opened in the directory and with the suffix indicated in the parameters. */ { struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s%s%s", outDir, tablePrefix, type->symbol, suffix); type->f = mustOpen(path, "w"); } } void closeTypeFiles(struct stanzaType *typeList) /* Close all open files in typeList. */ { struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { carefulClose(&type->f); } } void outputAccordingToType(struct hash *ra, struct stanzaType *type, FILE *f) /* Output data in ra as tab-separated according to specs in type. */ { /* Output ID which we generate here, and skip over first field since it is output*/ fprintf(f, "%d", ++type->lastId); struct stanzaField *field = type->fieldList; assert(field != NULL && sameString(field->name, "id")); field = field->next; /* Loop through rest. */ for (; field != NULL; field = field->next) { fputc('\t', f); /* Figure out what to put here - just a simple ra field lookup unless we have to merge. */ char *val = hashFindVal(ra, field->name); val = emptyForNull(val); fputs(val, f); } fputc('\n', f); } void outputTabs(struct stanzaType *typeList, struct hash *typeHash, char *inFile, char *outDir) /* Stream through inFile, reformatting it a bit into outDir. */ { /* Open inputs and output. */ struct lineFile *lf = lineFileOpen(inFile, TRUE); openTypeFiles(typeList, outDir, ".tab"); /* Stream through stanzas and dispatch according to type */ struct hash *ra; while ((ra = raNextStanza(lf)) != NULL) { char *typeName = hashMustFindVal(ra, "type"); struct stanzaType *type = hashMustFindVal(typeHash, typeName); outputAccordingToType(ra, type, type->f); struct stanzaType *split; for (split = type->splitSuccessor; split != NULL; split = split->splitSuccessor) { outputAccordingToType(ra, split, split->f); } hashFree(&ra); } /* Close all outputs and input. */ closeTypeFiles(typeList); lineFileClose(&lf); } char *nameInTypeOfTerms(char *name) /* Do a little symbol shuffling on "Cell Line" exception */ { if (sameString(name, "Cell Line")) return "cellType"; else return name; } char *getTypeDescription(struct stanzaType *type, struct hash *typeOfTermHash) /* Try and make up description of type, basically looking it up in the hash */ { char *totKey = nameInTypeOfTerms(type->name); /* key in typeOfTerm hash */ char *typeDescription = "NoTypeDescription"; struct slPair *typeOfTerm = hashFindVal(typeOfTermHash, totKey); if (typeOfTerm != NULL) { struct slPair *description = slPairFind(typeOfTerm, "description"); typeDescription = description->val; } return typeDescription; } void outputAutoSql(struct stanzaType *typeList, struct hash *typeOfTermHash, char *outFile) /* Output start of autoSql files to outDir, one per table. */ { FILE *f = mustOpen(outFile, "w"); struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { char *typeDescription = getTypeDescription(type, typeOfTermHash); char *indent = " "; fprintf(f, "table %s\n", type->symbol); typeDescription = makeEscapedString(typeDescription, '"'); fprintf(f, "\"%s\"\n", typeDescription); fprintf(f, "%s(\n", indent); struct stanzaField *field; for (field = type->fieldList; field != NULL; field = field->next) { fputs(indent, f); switch (field->valType) { case sftString: fputs("string", f); break; case sftUnsigned: fputs("uint", f); break; case sftInt: fputs("int", f); break; case sftFloat: fputs("float", f); break; case sftLongString: fputs("lstring", f); break; default: internalErr(); break; } fprintf(f, " %s;\t", field->symbol); fputc('"', f); char *description = searchFieldDescription(type->symbol, field->symbol); if (description == NULL) description = "noDescriptionDefined"; fputs(description, f); fputc('"', f); fputc('\n', f); } fprintf(f, "%s)\n", indent); fprintf(f, "\n"); } carefulClose(&f); } void outputSql(struct stanzaType *typeList, struct hash *typeOfTermHash, char *outFile) /* Output sql commands to create tables. Don't populate tables here, will do that * with tab-separated-files. */ { FILE *f = mustOpen(outFile, "w"); struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { char *typeDescription = getTypeDescription(type, typeOfTermHash); fprintf(f, "# %s\n", typeDescription); char *indent = " "; fprintf(f, "CREATE TABLE %s%s (\n", tablePrefix, type->symbol); struct stanzaField *field; for (field = type->fieldList; field != NULL; field = field->next) { fprintf(f, "%s%s ", indent, field->symbol); switch (field->valType) { case sftString: fputs("VARCHAR(255)", f); break; case sftUnsigned: fputs("INT UNSIGNED", f); break; case sftInt: fputs("INT", f); break; case sftFloat: fputs("FLOAT", f); break; case sftLongString: fputs("LONGBLOB", f); break; default: internalErr(); break; } boolean isOptional = (field->count != type->count); if (!isOptional) fprintf(f, " NOT NULL"); if (sameString(field->name, "id")) fprintf(f, " AUTO_INCREMENT"); char *description = searchFieldDescription(type->symbol, field->symbol); if (description == NULL) description = "noDescriptionDefined"; fprintf(f, ",\t# %s\n", description); } fprintf(f, "\t\t# Indices\n"); fprintf(f, "%sPRIMARY KEY(id)\n", indent); fprintf(f, ");\n\n"); } carefulClose(&f); } char *djangoClassName(struct stanzaType *type) /* Return type->symbol with initial letter capitalized */ { char *className = cloneString(type->symbol); className[0] = toupper(className[0]); return className; } void outputDjangoModels(struct stanzaType *typeList, struct hash *typeOfTermHash, char *outFile) /* Output Dmango models.py */ { /* Open file and write header */ FILE *f = mustOpen(outFile, "w"); fprintf(f, "# %s was originally generated by the cvToSql program\n", outFile); fprintf(f, "\n"); fprintf(f, "from django.db import models\n"); fprintf(f, "\n"); /* Write a django model for each type */ struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { char *indent = " "; char *typeSymbol = djangoClassName(type); char *typeDescription = getTypeDescription(type, typeOfTermHash); fprintf(f, "class %s(models.Model):\n", typeSymbol); fprintf(f, "%s\"\"\"%s\"\"\"\n", indent, typeDescription); struct stanzaField *field; for (field = type->fieldList; field != NULL; field = field->next) { if (sameString(field->name, "id")) continue; fprintf(f, "%s%s = models.", indent, field->symbol); char *fieldLabel = camelCaseToSeparator(field->name, ' '); switch (field->valType) { case sftString: fprintf(f, "CharField(\"%s\", max_length=255", fieldLabel); break; case sftUnsigned: fprintf(f, "PositiveIntegerField(\"%s\"", fieldLabel); break; case sftInt: fprintf(f, "IntegerField(\"%s\"", fieldLabel); break; case sftFloat: fprintf(f, "FloatField(\"%s\"", fieldLabel); break; case sftLongString: fprintf(f, "TextField(\"%s\"", fieldLabel); break; default: internalErr(); break; } freez(&fieldLabel); boolean isOptional = (field->count != type->count); if (isOptional) fprintf(f, ", blank=True"); fprintf(f, ")\n"); char *description = searchFieldDescription(type->symbol, field->symbol); if (description == NULL) description = "noDescriptionDefined"; fprintf(f, "\t# %s\n", description); } fprintf(f, "\n"); /* Add Meta subclass for extra info including table name that matches C expectations. */ fprintf(f, "%sclass Meta:\n", indent); fprintf(f, "%s%sdb_table = '%s%s'\n", indent, indent, tablePrefix, type->symbol); fprintf(f, "\n"); /* Print unicode method */ fprintf(f, "%sdef __unicode__(self):\n", indent); fprintf(f, "%s%sreturn self.term\n", indent, indent); fprintf(f, "\n"); fprintf(f, "\n"); } carefulClose(&f); } void outputDjangoAdmin(struct stanzaType *typeList, struct hash *typeOfTermHash, char *outFile) /* Write Django admin.py */ { /* Open file and write header */ FILE *f = mustOpen(outFile, "w"); fprintf(f, "# %s was originally generated by the cvToSql program\n", outFile); fprintf(f, "\n"); fprintf(f, "from django.contrib import admin\n"); fprintf(f, "from models import *\n"); fprintf(f, "\n"); /* Write out each type. */ struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) fprintf(f, "admin.site.register(%s)\n", djangoClassName(type)); /* Clean up and go home. */ carefulClose(&f); } void outputDjango(struct stanzaType *typeList, struct hash *typeOfTermHash, char *outDir) /* Output directory of little Python scripts for Django. */ { /* Make output directory and any directories needed to contain it. */ makeDirsOnPath(outDir); /* Output models file. */ char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s", outDir, "models.py"); outputDjangoModels(typeList, typeOfTermHash, path); /* Output admin file. */ safef(path, sizeof(path), "%s/%s", outDir, "admin.py"); outputDjangoAdmin(typeList, typeOfTermHash, path); } /******* Routines for rearranging stanza types. *********/ struct stanzaField *removeMatchingSymbol(struct stanzaField **pFieldList, char *symbol) /* Remove field from *list where field->symbol is same as symbol parameter. Return removed * field, or NULL if no such field. */ { struct stanzaField *newList = NULL, *field, *nextField; struct stanzaField *result = NULL; for (field = *pFieldList; field != NULL; field = nextField) { nextField = field->next; if (sameString(field->symbol, symbol)) result = field; else slAddHead(&newList, field); } slReverse(&newList); *pFieldList = newList; return result; } void reorderFields(struct stanzaType *type, char *firstFields[], int firstFieldsCount) /* Reorder fields in stanza so that the firstFields are in front and in order if they exist. * Remaining fields are after the first fields, and in their original order. */ { struct stanzaField *startList = NULL, *endList = type->fieldList; int i; for (i=0; ifieldList = slCat(startList, endList); } char *fieldPriority[] = { /* List of order we'd like initial fields in */ "id", "term", "tag", "deprecated", "description", }; struct stanzaField *stanzaFieldFake(char *name, struct stanzaType *parent, boolean isOptional) /* Make up a fake field of given characteristics. Will be interpreted as a string field. */ { struct stanzaField *field = stanzaFieldNew(name); if (isOptional) field->count = parent->count/2; else field->count = parent->count; return field; } void addDeprecatedField(struct stanzaType *type) /* Add deprecated field if it doesn't exist already */ { if (!stanzaFieldFind(type->fieldList, "deprecated")) { struct stanzaField *field = stanzaFieldFake("deprecated", type, TRUE); slAddHead(&type->fieldList, field); } } void addIdField(struct stanzaType *type) /* Add id field, and insure it doesn't already exist */ { if (stanzaFieldFind(type->fieldList, "id")) errAbort("Type %s already has an id field", type->name); struct stanzaField *field = stanzaFieldFake("id", type, FALSE); field->countFloat = field->countInt = field->countUnsigned = field->count; setValType(field); slAddHead(&type->fieldList, field); } void removeField(struct stanzaType *type, char *fieldName) /* Remove field of given type. */ { struct stanzaField *field = stanzaFieldFindSymbol(type->fieldList, fieldName); if (field != NULL) slRemoveEl(&type->fieldList, field); } void encodeCvToDb(char *inCvRa, char *outStats, char *outTree, char *outAs, char *outSql, char *outDjango, char *outDir) /* encodeCvToDb - Make a relational database version of cv.ra. */ { /* Read input into type list and hash */ struct hash *typeHash; /* stanzaType valued, keyed by type->name */ struct hash *typeOfTermHash; /* list of slPair valued, keyed by "term" tag */ struct stanzaType *typeList; stanzaTypesFromCvRa(inCvRa, &typeList, &typeHash, &typeOfTermHash); verbose(1, "%s contains %d types\n", inCvRa, typeHash->elCount); /* Output stats format before we start munging the tree. */ stanzaTypesToStats(typeList, outStats); /* Rearrange tree to be more as we'd like database. */ struct stanzaType *type; for (type = typeList; type != NULL; type = type->next) { // Grant is a reserved word in SQL so we have to change it if (sameString(type->name, "grant")) type->symbol = "grantee"; addDeprecatedField(type); addIdField(type); reorderFields(type, fieldPriority, ArraySize(fieldPriority)); removeField(type, "type"); } /* Output type tree and actual data */ stanzaTypeToTree(typeList, outTree); makeDirsOnPath(outDir); outputAutoSql(typeList, typeOfTermHash, outAs); outputSql(typeList, typeOfTermHash, outSql); outputDjango(typeList, typeOfTermHash, outDjango); outputTabs(typeList, typeHash, inCvRa, outDir); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 8) usage(); tablePrefix = optionVal("tablePrefix", tablePrefix); encodeCvToDb(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]); return 0; }