#!/usr/bin/env python import re HEAD = [ '<?xml version="1.0" encoding="UTF-8"?>', '<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">', '<plist version="1.0">' ] TAIL = [ '</plist>' ] HEAD_LEN = sum(map(len, HEAD)) + len(HEAD) * 4 HEAD_RE = re.compile(r'\s*'.join(map(re.escape, HEAD))) TAIL_RE = re.compile(r'\s*'.join(map(re.escape, TAIL))) def find_block(fh, lastblock, regex, skip=0, BLOCKSIZE=1024): matchfunc = regex.search match = matchfunc(lastblock) while match is None: block = fh.read(BLOCKSIZE) if not block: raise StopIteration lastblock = lastblock[-skip:] + block match = matchfunc(lastblock) return lastblock, match def scanner(fh): lastblock = '' while True: match = HEAD_RE.search(lastblock) lastblock, match = find_block(fh, lastblock, HEAD_RE, skip=HEAD_LEN) head = lastblock[match.start():match.end()] lastblock = lastblock[match.end():] lastblock, match = find_block(fh, lastblock, TAIL_RE) yield head + lastblock[:match.end()] lastblock = lastblock[match.end():] if __name__ == '__main__': import sys for i, plist in enumerate(scanner(sys.stdin)): print plist print '<!-- END PLIST %d -->' % (i,)