#!/usr/bin/python # -*- coding: iso-8859-1 -*- import mailbox, email, re, sys, os, traceback, sys def print_exc_plus2(limit=None, ffile=None, termination='\n'): """\ Sort of upgrade from the recipe print_exc_plus originally written by Bryn Keller. """ if ffile is None: ffile = sys.stderr tb = sys.exc_info()[2] while 1: if not tb.tb_next: break; tb = tb.tb_next stack = [] f = tb.tb_frame while f: stack.append(f) f = f.f_back stack.reverse() traceback.print_exc(limit, ffile) ffile.write("Locals by frame, innermost last" + termination) for frame in stack: ffile.write(termination) ffile.write("Frame %s in %s at line %s" % (frame.f_code.co_name, frame.f_code.co_filename, frame.f_lineno)) ffile.write(termination) for key, value in frame.f_locals.items(): ffile.write(termination) ffile.write("\t%20s = " % key) try: ffile.write(str(value)) except: ffile.write("") ffile.write(termination) ffile.write(termination) def nextMsg(box): #print box fd = os.path.normpath(box) #print fd if os.path.exists(fd) == 0: #print "I don't exist." return if os.path.isfile(fd) == 1: fi = file(box, "rw") #print "I think it's an mbox." mbox = mailbox.UnixMailbox(fi, email.message_from_file) while 1: msg = mbox.next() if msg is None: break yield msg fi.close() else: print "I think it's from O/OE" ff = os.listdir(fd) l = [] for a in ff: b = os.path.join(fd, a) l.append(b) for a in l: fi = file(a, "rw") #print fi yield email.message_from_file(fi) fi.close() if __name__ == "__main__": try: if len(sys.argv) < 2: sys.exit() for s in sys.argv[1:]: l = {} regex1 = re.compile('\d{4}\.\d{2}\.\d{2}\.\d{4}') regex2 = re.compile("(\n+From -.*(\n.*)*$|\n+_{2,}(\n.*)*$|\n-- (\n.*)*$|\n---(\n.*)*$)") regex3 = re.compile("^.*\r\n\r\n") subBreakTags = re.compile("(
|

)") cleanHtml = re.compile("<.*?>") for i, msg in enumerate(nextMsg(s)): m = regex1.search(msg['subject']) body = '' if not m is None: for part in msg.walk(): if part.get_type()=="text/plain": body = str(part.get_payload(decode=1)) print i, "text/plain" break elif part.get_type()=="text/html": body = str(part.get_payload(decode=1)) body = subBreakTags.sub("\r\n", body) body = cleanHtml.sub("", body) print i, "text/html" else: if not body: body = str(msg) print i, "full message" g = regex3.sub("", body) g = regex2.sub("", g.strip()) r = l.get(m.group(0), []) if i == 1:pass #print i,g r.append(g.strip()) l[m.group(0)] = r #print s k = l.keys() k.sort() fo = file(r"C:\compiledposts.txt", "w") fo.write("Number of posts: " + `len(k)` + "\nFirst Timestamp: " + k[0] + "\nLast Timestamp: " + k[-1] + "\n"); for i in k: for j in l[i]: fo.write(j) fo.write('\n\n------\n\n') fo.close() except: utils.print_exc_plus2()