class tag { string name; // Just the name, no attributes dir of string attributes; // All the var=value pairs int startPos; // Start position within XML int endPos; // End position within XML string text; // Text between this tag and the next array of tag children; // Any child tags } flow parseTagContents(string contents) into (string name, dir of string attributes) { // Parse out name, and set pos to the end of the name int pos = contents.leadingSpaces(); // Space after < is ok int endName = contents.findNext(' ', pos); if (endName < 0) // All just one word endName = contents.size; name = contents.middle(pos, endName-pos); pos = endName; // Create attributes dir, and loop to fill it up. attributes = (); for (;;) { // Skip over leading white space and see if we're done. pos += contents.leadingSpaces(pos); if (pos >= contents.size) break; // Find the equals and save away the word before it. int equalsPos = contents.findNext('=', pos); if (equalsPos < 0) punt("Missing = in $contents"); string var = contents.middle(pos, equalsPos-pos); pos = equalsPos+1; // Find the quoted string and save it pos = contents.findNext('"', pos); if (pos<0) punt("Missing quote after = in $contents"); pos += 1; // Skip over " int endQuote = contents.findNext('"', pos); if (pos<0) punt("Missing closing quote in $contents"); string val = contents.middle(pos, endQuote-pos); attributes[var] = val; pos = endQuote+1; // Skip over end quote } } flow parseNextTag(string xml, int pos) into (tag tag, int nextPos) { int startPos = xml.findNext('<', pos); if (startPos < 0) return; startPos += 1; // skip over '<' int endPos = xml.findNext('>', startPos); if (endPos < 0) punt("Unmatched '<'"); tag = (); tag.startPos = startPos; tag.endPos = endPos; string contents = xml.middle(startPos, endPos-startPos); (tag.name, tag.attributes) = parseTagContents(contents); nextPos = endPos+1; // skip over '>' } flow parseAllTags(string xml) into (array of tag tags) // Parse all tags into an array that includes closing tags. { tags = (); int pos = 0, tag lastTag, tag; for (;;) { (tag, pos) = parseNextTag(xml, pos); if (!tag) break; tags.append(tag); if (lastTag) { // Get the text between tags. The -2 avoids > and < int textSize = tag.startPos - lastTag.endPos - 2; string text = xml.middle(lastTag.endPos+1, textSize); lastTag.text = text.trim(); } lastTag = tag; } } to linkChildTags(array of tag tags, int pos) into (int nextPos) // Add all tags between this one and the closing tag children. { // Save current pos as root tag. It'll truly be the root // of the xml tree the first call, but in later calls it'll // just be the root of a local subtree. tag rootTag = tags[pos]; pos++; // Figure out closing tag name, and set up child array. string closingName = '/' + rootTag.name; rootTag.children = (); // Loop until find closing tag or fall off array (which would // be an error. for (;;) { if (pos >= tags.size) punt("Unclosed " + rootTag.name + " tag"); tag tag = tags[pos]; if (tag.name == closingName) { nextPos = pos+1; // Skip over closing tag. break; } // Save next tag in children array, then call self to // fill in *grandchildren*. We will be courteous enough // ourselves to leave pos past the grandchildren. rootTag.children.append(tag); pos = linkChildTags(tags, pos); } } to parseXml(string xml) into (tag rootTag) // Parse XML and return the root tag { array of tag tags = parseAllTags(xml); linkChildTags(tags, 0); rootTag = tags[0]; } to printTags(tag tag, int level) // Print out tags, with children progressively more indented. { // Print indentation. for (i in 0 to level) prin(' '); // Print information in this tag. prin(tag.name); prin(tag.attributes); string text = tag.text; print(" '$text'"); // Call self to print children. for (child in tag.children) printTags(child, level+1); } string xml = fileReadAll(args[0]); tag rootTag = parseXml(xml); printTags(rootTag, 0);