class tag { string name; // Just the name, no attributes dir of string attributes; // All the var=value pairs int startPos; // Start position within XML int endPos; // End position within XML string text; // Text between this tag and the next } flow parseTagContents(string contents) into (string name, dir of string attributes) { // Parse out name, and set pos to the end of the name int pos = contents.leadingSpaces(); // Space after < is ok int endName = contents.findNext(' ', pos); if (endName < 0) // All just one word endName = contents.size; name = contents.middle(pos, endName-pos); pos = endName; // Create attributes dir, and loop to fill it up. attributes = (); for (;;) { // Skip over leading white space and see if we're done. pos += contents.leadingSpaces(pos); if (pos >= contents.size) break; // Find the equals and save away the word before it. int equalsPos = contents.findNext('=', pos); if (equalsPos < 0) punt("Missing = in $contents"); string var = contents.middle(pos, equalsPos-pos); pos = equalsPos+1; // Find the quoted string and save it pos = contents.findNext('"', pos); if (pos<0) punt("Missing quote after = in $contents"); pos += 1; // Skip over " int endQuote = contents.findNext('"', pos); if (pos<0) punt("Missing closing quote in $contents"); string val = contents.middle(pos, endQuote-pos); attributes[var] = val; pos = endQuote+1; // Skip over end quote } } flow parseNextTag(string xml, int pos) into (tag tag, int nextPos) { int startPos = xml.findNext('<', pos); if (startPos < 0) return; startPos += 1; // skip over '<' int endPos = xml.findNext('>', startPos); if (endPos < 0) punt("Unmatched '<'"); tag = (); tag.startPos = startPos; tag.endPos = endPos; string contents = xml.middle(startPos, endPos-startPos); (tag.name, tag.attributes) = parseTagContents(contents); nextPos = endPos+1; // skip over '>' } flow parseAllTags(string xml) into (array of tag tags) { tags = (); int pos = 0, tag lastTag, tag; for (;;) { (tag, pos) = parseNextTag(xml, pos); if (!tag) break; tags.append(tag); if (lastTag) { // Get the text between tags. The -2 avoids > and < int textSize = tag.startPos - lastTag.endPos - 2; string text = xml.middle(lastTag.endPos+1, textSize); lastTag.text = text.trim(); } lastTag = tag; } } string xml = fileReadAll(args[0]); array of tag tags = parseAllTags(xml); for (tag in tags) print(tag);