[[[
Add the --skip-regexps option, and drive it.

(There are also some debug statements, because this all started with
trying to figure out why dcsops-dev@ wasn't getting into my addresses.
This fix doesn't work, and anyway I'm not not sure that it's the right
approach.)
]]]

Index: mailaprop.py
===================================================================
--- mailaprop.py	(revision 5638)
+++ mailaprop.py	(working copy)
@@ -288,6 +288,8 @@
         sending to it; otherwise just increase its regular count."""
         key_addr = addr.lower()  # canonicalize the key, as usual
         # Sanity check -- this should never fail.
+        if addr.find("dcsops-dev@") != -1:
+            sys.stderr.write("DEBUG: take(): in update(): '%s <%s>'\n" % (name, addr))
         if self.key_addr != key_addr:
             raise AddressDifference("'%s' and '%s' differ by more than case"
                                     % (self.addr, other_ah.addr))
@@ -331,6 +333,8 @@
             # it matches more than once, something is wrong, and we
             # will raise a FullAddressDuplication exception.
             matched = False
+            if addr.find("dcsops-dev@") != -1:
+                sys.stderr.write("DEBUG: take(): past first test in update(): '%s <%s>'\n" % (name, addr))
             for other_full_addr in self.full_addrs.keys():
                 # If they differ only by case, pick the better one or combine.
                 if candidate_full_addr.lower() == other_full_addr.lower():
@@ -351,6 +355,17 @@
                     new_full_addr  = self.make_full_addr(new_addr, new_name)
                     new_sent_count = other_sent_count + incr_sent
                     new_recv_count = other_recv_count + incr_recv
+                    if addr.find("dcsops-dev@") != -1:
+                        sys.stderr.write("DEBUG: take(): update(): start del seq:\n")
+                        sys.stderr.write("       candidate:     '%s'\n" % candidate_full_addr)
+                        sys.stderr.write("       other_fa:      '%s'\n" % other_full_addr)
+                        sys.stderr.write("       new_fa:        '%s'\n" % new_full_addr)
+                        sys.stderr.write("       new_name:      '%s'\n" % new_name)
+                        sys.stderr.write("       new_addr:      '%s'\n" % new_addr)
+                        sys.stderr.write("       new_date:      '%s'\n" % new_date)
+                        sys.stderr.write("       new_sent_ct:   '%s'\n" % new_sent_count)
+                        sys.stderr.write("       new_recv_ct:   '%s'\n" % new_recv_count)
+                        sys.stderr.write("       new_full_addr: '%s'\n" % new_full_addr)
                     del self.full_addrs[other_full_addr]
                     if self.full_addrs.has_key(candidate_full_addr):
                         del self.full_addrs[candidate_full_addr]
@@ -357,6 +372,8 @@
                     self.full_addrs[new_full_addr] = \
                         [new_date, new_sent_count, new_recv_count,]
             if not matched:
+                if addr.find("dcsops-dev@") != -1:
+                    sys.stderr.write("DEBUG: take(): update(): not matched '%s <%s>'\n" % (name, addr))
                 self.full_addrs[candidate_full_addr] = \
                     [date, incr_sent, incr_recv,]
 
@@ -371,15 +388,24 @@
         key_addr = addr.lower()
         ah = self.get(key_addr)
         if ah is None:
+            if addr.find("dcsops-dev@") != -1:
+                sys.stderr.write("DEBUG: take(): ah is None: '%s <%s>'\n" % (name, addr))
             self[key_addr] = AddressHistory(name, addr, date, sent_to)
         else:
+            if addr.find("dcsops-dev@") != -1:
+                sys.stderr.write("DEBUG: take(): about to update(): '%s <%s>'\n" % (name, addr))
             ah.update(name, addr, date, sent_to)
 
 
 reversed_unquoted_name_re = re.compile("([^, ]+), +([^, ]+)")
 
-def absorb_message(msg, addresses, restricteds):
+def absorb_message(msg, addresses, skip_regexps, restricteds):
     """File email.Message MSG into AddressBook ADDRESSES appropriately.
+
+SKIP_REGEXPS is a list of compiled regular expressions.  Any address
+that matches any of the regular expressions will be skipped, that is, 
+it will not be placed into ADDRESSES.
+
 RESTRICTEDS is a nested dictionary whose keys are lower-cased raw
 email addresses and whose values are subdictionaries, with each
 subdictionary's keys being the lower-cased permissible names (the values
@@ -391,6 +417,8 @@
     bccs = msg.get_all('bcc', [ ])
     raw_date = msg.get_all('date', None)
     for name, addr in email.Utils.getaddresses(froms + tos + ccs + bccs):
+        if addr.find("dcsops-dev@") != -1:
+            sys.stderr.write("DEBUG: found '%s <%s>'\n" % (name, addr))
         # Certain special cases can be eliminated right out of the gate.
         if (name.find("via StreetEasy") >= 0 or addr.find("via StreetEasy") >= 0
             # Anyone named Viagra has already changed their name by now, right?
@@ -411,6 +439,8 @@
             or addr.find("@unknown.email") >= 0
             or addr.find("notify@twitter.com") >= 0
             or addr.find("@postmaster.twitter.com") >= 0):
+            if addr.find("dcsops-dev@") != -1:
+                sys.stderr.write("DEBUG: continued here '%s <%s>'\n" % (name, addr))
             continue
         # Clean up the name.
         name = \
@@ -468,9 +498,13 @@
         sent_to = False
         for sender_addr in froms:
             if sender_addr.find("kfogel@") != -1:
+                if addr.find("dcsops-dev@") != -1:
+                    sys.stderr.write("DEBUG: sent to '%s <%s>'\n" % (name, addr))
                 sent_to = True
                 break
         # Okay, ready for prime time.
+        if addr.find("dcsops-dev@") != -1:
+            sys.stderr.write("DEBUG: about to test restricteds: '%s <%s>'\n" % (name, addr))
         if (restricteds.has_key(addr.lower())
             and not restricteds[addr.lower()].has_key(name.lower())):
             sys.stderr.write("DEBUG: restricted out: '%s <%s>'\n"
@@ -477,7 +511,12 @@
                              % (name, addr))
             pass
         elif addr.find("@") != -1:
-            addresses.take(addr, name, date, sent_to)
+            if addr.find("dcsops-dev@") != -1:
+                sys.stderr.write("DEBUG: about to take: '%s <%s>'\n" % (name, addr))
+            if not any(skip_re.match(addr) for skip_re in skip_regexps):
+                addresses.take(addr, name, date, sent_to)
+            else:
+                sys.stderr.write("DEBUG: skip_re match: '%s'\n" % addr)
 
 
 def main():
@@ -494,9 +533,14 @@
     # See absorb_message() for the format of this dictionary.
     restricteds = {}
 
+    # List of compiled regular expressions.  If an address matches any
+    # of these, it is skipped; see absorb_message() for details.
+    skip_regexps = []
+
     try:
         (opts, args) = getopt.getopt(sys.argv[1:], "",
-                                     [ "restricteds=", ])
+                                     [ "restricteds=", 
+                                       "skip-regexps=",])
     except getopt.GetoptError, err:
         sys.stderr.write(str(err))
         sys.stderr.write("\n")
@@ -521,6 +565,11 @@
             ignored_directories.append(optarg)
         elif opt in ("--ignore-contained"):
             ignored_if_containing.append(optarg)
+        elif opt in ("--skip-regexps"):
+            with open(optarg) as f:
+                skip_regexps = [re.compile(x) for x in f.readlines()] 
+            for r in skip_regexps:
+                sys.stderr.write("DEBUG: skip regexp: '%s'\n" % r)
   
     if len(args) < 1:
         roots = (".",)
@@ -535,7 +584,7 @@
         if msg_start_re.match(line):
             if msg_str:
                 msg = p.parsestr(msg_str)
-                absorb_message(msg, addresses, restricteds)
+                absorb_message(msg, addresses, skip_regexps, restricteds)
             msg_str = line
         else:
             msg_str += line
@@ -543,7 +592,7 @@
     # Polish off the last message.
     if msg_str:
         msg = p.parsestr(msg_str)
-        absorb_message(msg, addresses, restricteds)
+        absorb_message(msg, addresses, skip_regexps, restricteds)
     # Print out the elisp core.
     def elisp_addr(ah):
         """Return the Elisp expression for AddressHistory AH."""
Index: run-mailaprop.sh
===================================================================
--- run-mailaprop.sh	(revision 5638)
+++ run-mailaprop.sh	(working copy)
@@ -60,8 +60,11 @@
   MAILDIR=${HOME}/mail
   
   echo "Parsing mboxes..."
-  find ${MAILDIR} -type f -regex ".*/[0-9]+$\|.*outwent" -print \
-    | xargs cat | ${MAILAPROP} >> ${OUTFILE_TMP}
+  find ${MAILDIR} -type f -regex ".*/[0-9]+$\|.*outwent" -print      \
+    | xargs cat                                                      \
+    | ${MAILAPROP}                                                   \
+      --skip-regexps ${HOME}/private/mailaprop/problem-addresses.txt \
+      >> ${OUTFILE_TMP}
   echo "Done."
 fi