#!/usr/bin/python
# -*- coding: iso-8859-1 -*-
import mailbox, email, re, sys, os, traceback, sys
def print_exc_plus2(limit=None, ffile=None, termination='\n'):
"""\
Sort of upgrade from the recipe print_exc_plus originally written by Bryn Keller.
"""
if ffile is None:
ffile = sys.stderr
tb = sys.exc_info()[2]
while 1:
if not tb.tb_next:
break;
tb = tb.tb_next
stack = []
f = tb.tb_frame
while f:
stack.append(f)
f = f.f_back
stack.reverse()
traceback.print_exc(limit, ffile)
ffile.write("Locals by frame, innermost last" + termination)
for frame in stack:
ffile.write(termination)
ffile.write("Frame %s in %s at line %s" % (frame.f_code.co_name,
frame.f_code.co_filename,
frame.f_lineno))
ffile.write(termination)
for key, value in frame.f_locals.items():
ffile.write(termination)
ffile.write("\t%20s = " % key)
try:
ffile.write(str(value))
except:
ffile.write("")
ffile.write(termination)
ffile.write(termination)
def nextMsg(box):
#print box
fd = os.path.normpath(box)
#print fd
if os.path.exists(fd) == 0:
#print "I don't exist."
return
if os.path.isfile(fd) == 1:
fi = file(box, "rw")
#print "I think it's an mbox."
mbox = mailbox.UnixMailbox(fi, email.message_from_file)
while 1:
msg = mbox.next()
if msg is None: break
yield msg
fi.close()
else:
print "I think it's from O/OE"
ff = os.listdir(fd)
l = []
for a in ff:
b = os.path.join(fd, a)
l.append(b)
for a in l:
fi = file(a, "rw")
#print fi
yield email.message_from_file(fi)
fi.close()
if __name__ == "__main__":
try:
if len(sys.argv) < 2: sys.exit()
for s in sys.argv[1:]:
l = {}
regex1 = re.compile('\d{4}\.\d{2}\.\d{2}\.\d{4}')
regex2 = re.compile("(\n+From -.*(\n.*)*$|\n+_{2,}(\n.*)*$|\n-- (\n.*)*$|\n---(\n.*)*$)")
regex3 = re.compile("^.*\r\n\r\n")
subBreakTags = re.compile("(
|
)")
cleanHtml = re.compile("<.*?>")
for i, msg in enumerate(nextMsg(s)):
m = regex1.search(msg['subject'])
body = ''
if not m is None:
for part in msg.walk():
if part.get_type()=="text/plain":
body = str(part.get_payload(decode=1))
print i, "text/plain"
break
elif part.get_type()=="text/html":
body = str(part.get_payload(decode=1))
body = subBreakTags.sub("\r\n", body)
body = cleanHtml.sub("", body)
print i, "text/html"
else:
if not body:
body = str(msg)
print i, "full message"
g = regex3.sub("", body)
g = regex2.sub("", g.strip())
r = l.get(m.group(0), [])
if i == 1:pass
#print i,g
r.append(g.strip())
l[m.group(0)] = r
#print s
k = l.keys()
k.sort()
fo = file(r"C:\compiledposts.txt", "w")
fo.write("Number of posts: " + `len(k)` + "\nFirst Timestamp: " + k[0] + "\nLast Timestamp: " + k[-1] + "\n");
for i in k:
for j in l[i]:
fo.write(j)
fo.write('\n\n------\n\n')
fo.close()
except:
utils.print_exc_plus2()