#!/usr/bin/env python import re HEAD = [ '', '', '' ] TAIL = [ '' ] HEAD_LEN = sum(map(len, HEAD)) + len(HEAD) * 4 HEAD_RE = re.compile(r'\s*'.join(map(re.escape, HEAD))) TAIL_RE = re.compile(r'\s*'.join(map(re.escape, TAIL))) def find_block(fh, lastblock, regex, skip=0, BLOCKSIZE=1024): matchfunc = regex.search match = matchfunc(lastblock) while match is None: block = fh.read(BLOCKSIZE) if not block: raise StopIteration lastblock = lastblock[-skip:] + block match = matchfunc(lastblock) return lastblock, match def scanner(fh): lastblock = '' while True: match = HEAD_RE.search(lastblock) lastblock, match = find_block(fh, lastblock, HEAD_RE, skip=HEAD_LEN) head = lastblock[match.start():match.end()] lastblock = lastblock[match.end():] lastblock, match = find_block(fh, lastblock, TAIL_RE) yield head + lastblock[:match.end()] lastblock = lastblock[match.end():] if __name__ == '__main__': import sys for i, plist in enumerate(scanner(sys.stdin)): print plist print '' % (i,)