/* Routines for processing MIME from a descriptor. * For cgi post, the MIME descriptor is stdin. * We want to parse it as it comes in so that * we can handle very large files if needed. * Large data are saved to tempfiles. * Small data stays as ptr+size in memory. * * This file is copyright 2005 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "hash.h" #include "linefile.h" #include "cheapcgi.h" #include "portable.h" #include "errabort.h" #include "mime.h" /* * Note: MIME is a nested structure that makes a tree that streams in depth-first. */ #define MAXPARTSIZE 64LL*1024*1024*1024 /* max size before gets put in a tempfile to save memory. It currently has been set so large that it will not be triggered. */ #define MAXPARTLINESIZE 1024 /* header lines should be small, so bad if bigger than this */ #define MAXDATASIZE 64LL*1024*1024*1024 /* max size allowable for large uploads */ #define MAXBOUNDARY 72+5 /* max size of buffer for boundary 72+--""0 */ enum nlType nlType = nlt_undet; static void setEopMB(struct mimeBuf *b) /* do a search for boundary, set eop End Of Part if found */ { if (b->blen > 0) b->eop = memMatch(b->boundary, b->blen, b->i, b->eoi - b->i); else b->eop = NULL; } static void setEodMB(struct mimeBuf *b) /* set end of data - eoi minus (boundary-size -1) */ { if (b->blen > 1 && b->eoi == b->eom) { b->eod = b->eoi - (b->blen-1); } else { b->eod = b->eoi; } } static void setBoundaryMB(struct mimeBuf *b, char *boundary) /* set boundary in b */ { b->boundary = boundary; b->blen = boundary ? strlen(b->boundary) : 0; setEopMB(b); setEodMB(b); } #ifdef DEBUG static void dumpMB(struct mimeBuf *b) /* debug dump */ { int i=0; fprintf(stderr,"b->i =%lu " "b->eop=%lu " "b->eod=%lu " "b->eoi=%lu " "b->eom=%lu " "%s " "%d " "\n", (unsigned long) b->i, (unsigned long) b->eop, (unsigned long) b->eod, (unsigned long) b->eoi, (unsigned long) b->eom, b->boundary, b->blen ); fprintf(stderr,"*"); for(i=0;ibuf[i] < 31 || (unsigned) b->buf[i] > 127) ? '.' : b->buf[i] ); } fprintf(stderr,"\n\n"); } #endif static void moreMimeBuf(struct mimeBuf *b) { int bytesRead = 0, bytesToRead = 0; if (b->blen > 1) { int r = b->eoi - b->i; memmove(b->buf, b->i, r); b->eoi = b->buf+r; } else { b->eoi = b->buf; } b->i = b->buf+0; bytesToRead = b->eom - b->eoi; while (bytesToRead > 0) { bytesRead = read(b->d, b->eoi, bytesToRead); if (bytesRead < 0) errnoAbort("moreMimeBuf: error reading MIME input descriptor"); b->eoi += bytesRead; if (bytesRead == 0) break; bytesToRead = bytesToRead - bytesRead; } setEopMB(b); setEodMB(b); //debug //fprintf(stderr,"post-moreMime dumpMB: "); //dumpMB(b); //debug } static char getcMB(struct mimeBuf *b) /* read just one char from MIME buffer */ { if (b->i >= b->eoi && b->eoi < b->eom) /* at end of input */ errAbort("getcMB error - requested input beyond end of MIME input."); if (b->i >= b->eod && b->eoi == b->eom) /* at end of buffer */ moreMimeBuf(b); //fprintf(stderr,"b->buf:%lu b->i:%lu %c \n", // (unsigned long) b->buf, // (unsigned long) b->i, // *b->i // ); //fprintf(stderr,"%c",*b->i); //fflush(stderr); return *b->i++; } static void putBackMB(struct mimeBuf *b) /* Rewind just one char back in MIME buffer. * Do not use except for distinguishing line type initially */ { if (b->i == b->buf) /* at beginning of buffer */ errAbort("putBackMB error - requested pushback beyond beginning buffer."); b->i--; } static char *getLineMB(struct mimeBuf *b) /* Reads one line up to CRLF, returned string does not include CRLF however. Use freeMem when done with string. */ { char line[MAXPARTLINESIZE]; int i = 0; char c = 0; line[0]=0; while(TRUE) { c =getcMB(b); if ((c == 0x0d) || (c == 0x0a)) /* CR or LF is end of line */ break; line[i++] = c; if (i >= MAXPARTLINESIZE) errAbort("getLineMB error - MIME input header too long, " "greater than %d chars",MAXPARTLINESIZE); } line[i] = 0; /* terminate string */ if (nlType == nlt_undet) /* determine newline type */ { if (c == 0x0d) { nlType = nlt_mac; c = getcMB(b); if (c == 0x0a) nlType = nlt_dos; else putBackMB(b); } else { nlType = nlt_unix; } } else if (nlType == nlt_dos) { if (c == 0x0d) getcMB(b); /* just waste the LF */ else nlType = nlt_unix; } return cloneString(line); } static void getChunkMB(struct mimeBuf *b, char **address, int *size, boolean *hasZeros) /* Pass back address and size of chunk, and whether it contains embedded zeros. The chunk is the largest piece of data left in the buffer up to the eod or eop. */ { char *eoc = b->eop ? b->eop : b->eod; /* end of chunk */ //debug //fprintf(stderr,"pre-getChunkMB dumpMB: "); //dumpMB(b); //debug *address=b->i; *size=eoc - b->i; *hasZeros = (memMatch("", 1,*address, *size) != NULL); b->i = eoc; } static void readPartHeaderMB(struct mimeBuf *b, struct mimePart *p, char *altHeader) /* Reads the header lines of the mimePart, saves the header settings in a hash. */ { struct dyString *fullLine = dyStringNew(0); char *key=NULL, *val=NULL; struct lineFile *lf = NULL; char *line = NULL; char *lineAhead = NULL; int size = 0; p->hdr = newHash(3); //debug //fprintf(stderr,"headers dumpMB: "); //dumpMB(b); //debug if (altHeader) { lf = lineFileOnString("MIME Header", TRUE, altHeader); } /* read ahead one line, skipping any leading blanks lines */ do { if (altHeader) lineFileNext(lf, &lineAhead, &size); else lineAhead = getLineMB(b); } while (sameString(lineAhead,"")); do { /* accumulate a full header line - some emailers split into mpl lines */ dyStringClear(fullLine); do { line = lineAhead; if (altHeader) lineFileNext(lf, &lineAhead, &size); else lineAhead = getLineMB(b); dyStringAppend(fullLine,line); if (!altHeader) freez(&line); } while (isspace(lineAhead[0])); line = fullLine->string; //fprintf(stderr,"found a line! [%s]\n",line); //debug key = line; val = strchr(line,':'); if (!val) errAbort("readPartHeaderMB error - header-line colon not found, line=[%s]",line); *val = 0; val++; key=trimSpaces(key); // since the hash is case-sensitive, convert to lower case for ease of matching tolowers(key); val=trimSpaces(val); hashAdd(p->hdr,key,cloneString(val)); //debug //fprintf(stderr,"MIME header: key=[%s], val=[%s]\n",key,val); //fflush(stderr); } while (!sameString(lineAhead,"")); if (altHeader) { if (nlType == nlt_undet) nlType = lf->nlType; lineFileClose(&lf); } else { freez(&lineAhead); } dyStringFree(&fullLine); } struct mimeBuf * initMimeBuf(int d) /* d is a descriptor for a file or socket or some other descriptor that the MIME input can be read from. Initializes the mimeBuf structure. */ { struct mimeBuf *b=AllocA(*b); b->d = d; b->boundary = NULL; b->blen = 0; b->eom = b->buf+MIMEBUFSIZE; b->eoi = b->eom; b->eod = b->eom; b->i = b->eom; moreMimeBuf(b); return b; } char *getMimeHeaderMainVal(char *header) /* Parse a typical mime header line returning the first * main value up to whitespace, punctuation, or end. * freeMem the returned string when done */ { char value[1024]; char *h = header; int i = 0; char *puncChars = ",;: \t\r\n"; /* punctuation chars */ i=0; /* The header should have already been trimmed of leading and trailing spaces */ while(TRUE) { char c = *h++; if (c==0 || strchr(puncChars,c)) break; value[i++] = c; if (i >= sizeof(value)) errAbort("error: main value too long (>%lu) in MIME header Content-type:%s",(unsigned long)sizeof(value),header); } value[i] = 0; return cloneString(value); } char *getMimeHeaderFieldVal(char *header, char *field) /* Parse a typical mime header line looking for field= * and return the value which may be quoted. * freeMem the returned string when done */ { char value[1024]; char *fld = header; int i = 0; char *puncChars = ",;: \t\r\n"; /* punctuation chars */ while (TRUE) { fld = strstr(fld,field); if (!fld) return NULL; if (fld > header && strchr(puncChars,fld[-1])) { fld+=strlen(field); if (*fld == '=') { ++fld; break; } } else { ++fld; } } if (*fld == '"') { puncChars = "\""; /* quoted */ ++fld; } i=0; while(TRUE) { char c = *fld++; if (c==0 || strchr(puncChars,c)) break; value[i++] = c; if (i >= sizeof(value)) errAbort("error: %s= value too long (>%lu) in MIME header Content-type:%s",field,(unsigned long)sizeof(value),header); } value[i] = 0; return cloneString(value); } char *getNewLineByType() /* just use global nlType setting */ { switch (nlType) { case nlt_dos: //debug //fprintf(stderr,"nlType=nlt_dos\n"); return "\x0d\x0a"; case nlt_mac: //debug //fprintf(stderr,"nlType=nlt_mac\n"); return "\x0d"; case nlt_unix: default: //debug //fprintf(stderr,"nlType=nlt_unix\n"); return "\x0a"; } } struct mimePart *parseMultiParts(struct mimeBuf *b, char *altHeader) /* This is a recursive function. It parses multipart MIME messages. Data that are binary or too large will be saved in mimePart->filename otherwise saved as a c-string in mimePart->data. If multipart, then first child is mimePart->child, subsequent sibs are in child->next. altHeader is a string of headers that can be fed in if the headers have already been read off the stream by an earlier process, i.e. apache. */ { struct mimePart *p=AllocA(*p); char *parentboundary = NULL, *boundary = NULL; char *ct = NULL; boolean autoBoundary = FALSE; //debug //fprintf(stderr,"altHeader=[%s]\n",altHeader); if (sameOk(altHeader, "autoBoundary")) { /* process things with no explicit header. * look for *MIME* \n\n-- */ struct dyString *dy = dyStringNew(0); char *prevPrevLine = NULL; char *prevLine = NULL; char *line = NULL; boolean found = FALSE; autoBoundary = TRUE; while (TRUE) { if (b->i >= b->eoi && b->eoi < b->eom) /* at end of input */ break; line = getLineMB(b); if (line && startsWith("--",line) // && //sameString(prevLine,"") && //prevPrevLine && //stringIn("MULTI",prevPrevLine) && //stringIn("MIME",prevPrevLine) ) { found = TRUE; break; } freez(&prevPrevLine); prevPrevLine = prevLine; prevLine = line; if (prevPrevLine) touppers(prevPrevLine); } if (!found) errAbort("autoBoundary: No initial boundary found."); dyStringPrintf(dy, "CONTENT-TYPE:multipart/form-data; boundary=%s%s%s", line+2, getNewLineByType(), getNewLineByType() ); altHeader = dyStringCannibalize(&dy); //debug //fprintf(stderr,"autoBoundary altHeader = [%s]\n",altHeader); //fflush(stderr); freez(&prevPrevLine); freez(&prevLine); freez(&line); } //debug //fprintf(stderr,"\n"); readPartHeaderMB(b,p,altHeader); ct = hashFindVal(p->hdr,"content-type"); /* use lowercase key */ //debug //fprintf(stderr,"ct from hash:%s\n",ct); //fflush(stderr); if (ct && startsWith("multipart/",ct)) { char bound[MAXBOUNDARY]; char *bnd = NULL; struct mimePart *child = NULL; /* these 3 vars just for processing epilog chunk: */ char *bp=NULL; int size=0; boolean hasZeros=FALSE; /* save */ parentboundary = b->boundary; boundary = getMimeHeaderFieldVal(ct,"boundary"); if (strlen(boundary) >= MAXBOUNDARY) errAbort("error: boundary= value too long in MIME header Content-type:%s",ct); safef(bound, sizeof(bound), "--%s",boundary); /* do not prepend CRLF to boundary yet */ freez(&boundary); boundary = cloneString(bound); //debug //fprintf(stderr,"initial boundary parsed:%s\n",boundary); //fflush(stderr); if (!autoBoundary) { /* skip any extra "prolog" before the initial boundary marker */ while (TRUE) { bnd = getLineMB(b); if (sameString(bnd,boundary)) break; freez(&bnd); } //debug //fprintf(stderr,"initial boundary found:%s\n",bnd); //fflush(stderr); freez(&bnd); } /* include crlf in the boundary so bodies won't have trailing a CRLF * this is done here so that in case there's no extra CRLF * between the header and the boundary, it will still work, * so we only prepend the CRLF to the boundary after initial found */ safef(bound,sizeof(bound),"%s%s", getNewLineByType(), boundary); freez(&boundary); boundary=cloneString(bound); setBoundaryMB(b, boundary); while(TRUE) { int i = 0; char c1 = ' ', c2 = ' '; child = parseMultiParts(b,NULL); slAddHead(&p->multi,child); //call getLine, compare to boundary /* skip extra initial boundary marker - it's moot anyway */ freez(&bnd); //debug //fprintf(stderr,"post-parse pre-getLineMB dumpMB: "); //dumpMB(b); //debug for (i=0;imulti); /* restore */ freez(&boundary); boundary = parentboundary; //debug //fprintf(stderr,"restoring parent boundary = %s\n",boundary); setBoundaryMB(b, boundary); /* dump any "epilog" that may be between the * end of the child boundary and the parent boundary */ getChunkMB(b, &bp, &size, &hasZeros); //debug //fprintf(stderr,"epilog size=%d\n",size); } else { char *bp=NULL; int size=0; boolean hasZeros=FALSE; boolean toobig=FALSE; boolean asFile=FALSE; boolean convert=FALSE; FILE *f = NULL; struct dyString *dy=newDyString(1024); //debug //fprintf(stderr,"starting new part (non-multi), dumpMB: \n"); //dumpMB(b); //debug //debug //ct = hashFindVal(p->hdr,"content-transfer-encoding"); /* use lowercase key */ //fprintf(stderr,"cte from hash:%s\n",ct); while(TRUE) { // break if eop, eod, eoi getChunkMB(b, &bp, &size, &hasZeros); //debug //fprintf(stderr,"bp=%lu size=%d, hasZeros=%d \n", // (unsigned long) bp, // size, // hasZeros); if (hasZeros) { p->binary=TRUE; } //if (hasZeros && !asFile) // { // convert=TRUE; // } if (!asFile && p->size+size > MAXPARTSIZE) { toobig = TRUE; convert=TRUE; } if (convert) { struct tempName uploadedData; convert=FALSE; asFile = TRUE; makeTempName(&uploadedData, "hgSs", ".cgi"); p->fileName=cloneString(uploadedData.forCgi); f = mustOpen(p->fileName,"w"); mustWrite(f,dy->string,dy->stringSize); freeDyString(&dy); } if (asFile) { mustWrite(f,bp,size); } else { dyStringAppendN(dy,bp,size); } p->size+=size; if (p->size > MAXDATASIZE) errAbort("max data size allowable for upload in MIME exceeded %llu",(unsigned long long)MAXDATASIZE); if (b->eop && b->i == b->eop) /* end of part */ { break; } if (b->i == b->eoi && b->eoi < b->eom) /* end of data */ { break; } moreMimeBuf(b); } if (dy) { p->data=needLargeMem(dy->stringSize+1); memcpy(p->data,dy->string,dy->stringSize); p->data[dy->stringSize] = 0; freeDyString(&dy); } if (f) carefulClose(&f); //debug //fprintf(stderr,"p->fileName=%s p->data=[%s]\n",p->fileName,p->data); } return p; }