// A little C parser. Fakes a few things (skips # directives.) // This will find and complain about missing breaks in a case in a // switch. include 'cToken' class cParser { cTkz tkz; to tokType() into short type { type = tkz.cur.type; } to nextType() into short type { type = tkz.next.type; } to tokVal() into string s { s = tkz.cur.val; } to nextVal() into string s { s = tkz.next.val; } to advance() { tkz.advance(); } to mustAdvance() { advance(); if (tokType() == ctEof) errAt("Unexpected end of file"); } to init(string fileName) { tkz = (fileName); advance(); } to errAt(string message) { int line = tkz.cur.line; string file = tkz.cur.fileName; string tok = tkz.cur.val; string nextTok = tkz.next.val; punt("error at $tok $nextTok, line $line of $file: $message"); } to expectingGot(string expecting) { string got = tokVal(); errAt("Expecting $expecting got $got"); } to eatVal(string val) { if (tokVal() != val) expectingGot(val); advance(); } to eatName() into string s { if (tokType() != ctName) expectingGot("name"); s = tokVal(); advance(); } to fakePreprocessor() { int lineIx = tkz.cur.line; advance(); for (;;) { if (tokType() == ctEscape) { advance(); lineIx = tkz.cur.line; } else if (tkz.cur.line != lineIx) break; advance(); } } to storageClass() { string val = tokVal(); case (val) { "extern", "static", "auto", "register" : advance(); } } to structOrEnum() into string name { string val = tokVal(); case (val) { "struct" : { advance(); // over struct if (nextType() == ctOpenCurly) { name = tokVal(); advance(); // Over name; compoundStatement(); } } "enum" : { advance(); if (nextType() == ctOpenCurly) { punt("Don't know how to define enums yet."); } } } } to eatOptional(string val) // Skip to next token if it's the same as val { if (tokVal() == val) advance(); } to constQualifier() { eatOptional('const'); } to atom() { case (tokType()) { ctChar, ctName, ctInt, ctLong, ctFloat: advance(); ctSharp: { fakePreprocessor(); expression(); } ctOpenParen: { advance(); expression(); eatVal(')'); } ctOpenCurly: { advance(); expression(); eatOptional(','); eatVal('}'); } ctString: { while (tokType() == ctString) advance(); } } } to callIndexField() { atom(); for (;;) { case (tokType()) { ctOpenParen: { advance(); expression(); eatVal(')'); } ctOpenSquare: { advance(); expression(); eatVal(']'); } ctDot,ctPointer: { advance(); if (tokType() != ctName) expectingGot("name"); advance(); } else: break; } } } to unary() { case (tokType()) { ctBang, ctTilde, ctPlusPlus,ctMinusMinus,ctPlus,ctMinus,ctAsterisk: { advance(); unary(); } ctOpenParen: // Deal with cast, argh! { bit isCast = false; if (nextType() == ctName) { case (nextVal()) { 'struct', 'const', 'int', 'char', 'short', 'long', 'unsigned', 'float', 'double', 'enum' : isCast = true; } } if (isCast) { int level = 0; for (;;) /* Skip to matching paren */ { case (tokType()) { ctOpenParen: ++level; ctCloseParen: { --level; if (level == 0) break; } } mustAdvance(); } advance(); // skip close paren } } ctName: { if (tokVal() == 'sizeof') { advance(); unary(); } } } callIndexField(); case (tokType()) { ctPlusPlus, ctMinusMinus: advance(); } } to product() { unary(); for (;;) { case (tokType()) { ctAsterisk, ctDiv, ctPercent: { advance(); unary(); } else: break; } } } to sum() { product(); for (;;) { case (tokType()) { ctPlus, ctMinus: { advance(); product(); } else: break; } } } to shift() { sum(); for (;;) { case (tokType()) { ctShiftLeft, ctShiftRight: { advance(); sum(); } else: break; } } } to cmp() { shift(); for (;;) { case (tokType()) { ctLess, ctLessEqual, ctGreater, ctGreaterEqual: { advance(); shift(); } else: break; } } } to eqNe() { cmp(); for (;;) { case (tokType()) { ctEqualEqual,ctNotEqual: { advance(); cmp(); } else: break; } } } to bitAnd() { eqNe(); while (tokType() == ctAnd) { advance(); eqNe(); } } to xor() { bitAnd(); while (tokType() == ctCaret) { advance(); bitAnd(); } } to bitOr() { xor(); while (tokType() == ctOr) { advance(); xor(); } } to logAnd() { bitOr(); while (tokType() == ctAndAnd) { advance(); bitOr(); } } to logOr() { logAnd(); while (tokType() == ctOrOr) { advance(); logAnd(); } } to trinary() { logOr(); if (tokType() == ctQuestion) { advance(); logOr(); eatVal(':'); logOr(); } } to assign() { trinary(); for (;;) { case(tokType()) { ctEqual, ctPlusEqual, ctMinusEqual, ctTimesEqual, ctDivEqual, ctModEqual, ctAndEqual, ctOrEqual, ctXorEqual, ctLeftShiftEqual, ctRightShiftEqual : { advance(); trinary(); } else: break; } } } to tuple() { assign(); while (tokType() == ctComma) { advance(); assign(); } } to expression() { tuple(); } to nameAndTypeDecorations() into string name { case (tokType()) { ctName: { name = tokVal(); advance(); } ctOpenParen: { advance(); name = nameAndTypeDecorations(); eatVal(')'); } ctAsterisk: { advance(); name = nameAndTypeDecorations(); } } while (tokType() == ctOpenSquare) { advance(); expression(); eatVal(']'); } } to ifStatement() { advance(); // over if eatVal('('); expression(); eatVal(')'); statement(); if (tokVal() == "else") { advance(); statement(); } } to forStatement() { advance(); // over for eatVal('('); if (tokType() != ctSemi) expression(); eatVal(';'); if (tokType() != ctSemi) expression(); eatVal(';'); if (tokType() != ctCloseParen) expression(); eatVal(')'); statement(); } to whileStatement() { advance(); // over while eatVal('('); expression(); eatVal(')'); statement(); } to checkForBreak() // Advance through statements until we get a 'case', 'default' or '}' // Make sure that there is a break somewhere in there. { int level = 1; bit gotBreak = false, anyLines = false; int line = tkz.cur.line; string file = tkz.cur.fileName; for (;;) { case (tokVal()) { '{' : { ++level; advance(); } '}' : { --level; if (level == 0) break; advance(); } 'case', 'default': break; 'break', 'return': { gotBreak = true; statement(); } else : { statement(); anyLines = true; } } } if (anyLines && !gotBreak) punt("Missing break line $line of $file"); } to switchStatement() { advance(); eatVal('('); expression(); eatVal(')'); eatVal('{'); for (;;) { case (tokVal()) { '}' : {advance(); break} 'case' : { advance(); // over case advance(); // over constant advance(); // over colon checkForBreak(); } 'default' : { advance(); // over case advance(); // over colon checkForBreak(); } else : expectingGot("case or default in switch"); } } } to returnStatement() { advance(); // over return if (tokType() != ctSemi) expression(); eatVal(';'); } to statement() { case (tokType()) { ctPlusPlus, ctMinusMinus, ctAsterisk: { expression(); eatVal(';') } ctSharp : fakePreprocessor(); ctOpenCurly: compoundStatement(); ctSemi: advance(); ctName: { case (tokVal()) { "if": ifStatement(); "switch": switchStatement(); "for": forStatement(); "while" : whileStatement(); "return" : returnStatement(); "continue", "break": { advance(); eatVal(';'); } else: { case (nextType()) { ctAsterisk, ctName: declaration(false, true); else: {expression(); eatVal(';')} } } } } else: expectingGot('statement'); } } to compoundStatement() // parse a compound statement. { advance(); // skip '{' for (;;) { if (tokType() == ctCloseCurly) { advance(); break; } else statement(); } } to functionParams() // Have eaten opening paren. Parse until next close paren. { advance(); // skip '(' for (;;) { case (tokType()) { ctDotDotDot : {advance(); break} ctCloseParen : break; else : { typedDeclaration(false); if (tokType() == ctComma) advance(); else break; } } } eatVal(')'); } to unsignedAndLong() into string s { for (;;) { case (tokVal()) { 'int', 'long', 'unsigned': { s = tokVal(); advance(); } else: break; } } } to typedDeclaration(bit bodyOk) into bit isFunc // Get a typed declaration { storageClass(); constQualifier(); string type = structOrEnum(); if (!type) { type = unsignedAndLong(); if (!type) type = eatName(); } isFunc = untypedDeclaration(bodyOk); } to untypedDeclaration(bit bodyOk) into bit isFunc { constQualifier(); string name = nameAndTypeDecorations(); case (tokType()) { ctEqual: { advance(); expression(); } ctOpenParen: { functionParams(); if (tokType () == ctOpenCurly) { if (!bodyOk) errAt("No function body allowed in this context"); compoundStatement(); isFunc = true; } } } } to declaration(bit bodyOk, bit needSemi) { bit isFunc = typedDeclaration(bodyOk); if (!isFunc) { for (;;) { if (tokType() == ctComma) advance(); else break; untypedDeclaration(bodyOk); } if (needSemi) eatVal(';'); } } to parse() { for (;;) { short type = tokType(); case (type) { ctSharp : fakePreprocessor(); ctName : declaration(true, true); ctSemi : ; ctEof: break; else: expectingGot("declaration"); } } } } if (args.size == 0) { punt(" cValidate - look for missing breaks in C code. usage: cValidate file(s).c "); } for (arg in args) { try { cParser parser = (arg); parser.parse(); } catch (m) { print(m); } }