#!/usr/local/bin/python import sys import os import getopt import re import string import StringIO import types import socket def usage(msg, exitCode=1): if msg: sys.stderr.write('\nerror: %s\n\n' % msg) sys.stderr.write('''usage: httpflow.py Reads tcpflow output from stdin, parses and filters HTTP conversations, writes summary of header information to stdout. --help: print this usage information and exit --include : Report only headers that match regular expression . Can appear multiple times on command line. Each expression is appended together with an or. I.e. '||.' --exclude : Do not report headers that match regular expression [\d]+\.[\d]+\.[\d]+\.[\d]+)\.(?P[\d]+)- (?P[\d]+\.[\d]+\.[\d]+\.[\d]+)\.(?P[\d]+):\s* (?P.*)$ ''', re.VERBOSE) # REM: ought to rewrite this so it works off of the 'Data' field of the above...actually, may be able to eliminate this one completely... headerStartRE = re.compile(r''' ^(?P[\d]+\.[\d]+\.[\d]+\.[\d]+)\.(?P[\d]+)- (?P[\d]+\.[\d]+\.[\d]+\.[\d]+)\.(?P[\d]+):\s* (?P.*HTTP/1.*)$ ''', re.VERBOSE) headerStopRE = re.compile('''^(\r|\n)*$''') outstandingMessages = {} printableMessages = {} messageKey = '' # the key of the 'current' message responseKey = '' # the (future) messageKey of the response to the 'current' message # Note: possible parse modes are: # 'scan' to watch for the beginning of a section; # 'header' to scan, assemble headers, and watch for the end of the header section; # 'body' prints lines from the body of the message and instructs scan() to end the body section neatly if a new section starts. parseMode = 'scan' # FIXME: should do these with constants of some kind... def parseStream(inclusionExpr, exclusionExpr): while 1: inputLine = sys.stdin.readline() if inputLine == '': return if not scan(inputLine): if parseMode == 'header': header(inputLine, inclusionExpr, exclusionExpr) if parseMode == 'body': body(inputLine) # REM: this method is way too big and has its fingers in way too many pies... def scan(inputLine): global parseMode global messageKey global responseKey blip = 0 # return value startMatch = communicationsStartRE.match(inputLine) if startMatch: blip = 1 if parseMode == 'body': # close out the body that was just being printed parseMode = 'scan' print "----- end body -----" print sA = startMatch.group('SourceAddress') dA = startMatch.group('DestinationAddress') sP = int(startMatch.group('SourcePort')) dP = int(startMatch.group('DestinationPort')) messageKey = "%s:%d" % (dA, dP) headerMatch = headerStartRE.match(inputLine) if headerMatch: # header mode parseMode = 'header' print "--- begin header ---" if outstandingMessages.has_key(messageKey): # if we saw this message go out... print "From request: " + outstandingMessages[messageKey] # ...print what it is responding to print "Source: %s : %d (%s)" % (sA, sP , nameFromIP(sA)) print "Destination: %s : %d (%s)" % (dA, dP, nameFromIP(dA)) print data = headerMatch.group('Data') print data if string.find(data, "HTTP/1") > 0: responseKey = "%s:%d" % (sA, sP) outstandingMessages[responseKey] = data printableMessages[messageKey] = 0 printableMessages[responseKey] = 0 else: # body mode if shouldPrintBody(messageKey): parseMode = 'body' print "---- begin body ----" if outstandingMessages.has_key(messageKey): # if we saw this message go out... print "From request: " + outstandingMessages[messageKey] # ...print what it is responding to print "Source: %s : %d (%s)" % (sA, sP , nameFromIP(sA)) print "Destination: %s : %d (%s)" % (dA, dP, nameFromIP(dA)) print data = startMatch.group('Data') print data return blip def header(inputLine, inclusionExpr, exclusionExpr): # look for transition out of headers global parseMode endMatch = headerStopRE.match(inputLine) if endMatch: print "---- end header ----" print if shouldPrintBody(messageKey): parseMode = 'body' print "---- begin body ----" else: parseMode = 'scan' else: inputLine = string.replace(inputLine, '\r', '') inputLine = string.replace(inputLine, '\n', '') if inclusionExpr and not inclusionExpr.match(inputLine): return if exclusionExpr and exclusionExpr.match(inputLine): return # inputLine has passed the filters, we have A WINNER!! print inputLine if bodiesFlag: printableMessages[messageKey] = 1 # header matched, print body if repliesFlag: printableMessages[responseKey] = 1 # header matched, print reply def body(inputLine): if shouldPrintBody(messageKey): print inputLine def shouldPrintBody(key): return printableMessages.has_key(key) and printableMessages[key] and (bodiesFlag or repliesFlag) def nameFromIP(anIP): try: return socket.gethostbyaddr(anIP)[0] except: return "-unknown-" if __name__ == "__main__": main()