X-Git-Url: http://club.cc.cmu.edu/~cmccabe/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=snarf_mail.rb;h=76c831ce8c0d2b521e1334d0b18cb3b9f0a19fc0;hb=2c1e447111e0043adcb0a8c8418a6ee02e82e8e1;hp=a2490b1057b973af3f06376e66edcd7945917c97;hpb=19e1e2d6b1fdd542c89c4a3e31bcf08bebbe0215;p=cmccabe-bin diff --git a/snarf_mail.rb b/snarf_mail.rb index a2490b1..76c831c 100755 --- a/snarf_mail.rb +++ b/snarf_mail.rb @@ -9,6 +9,7 @@ # http://ruby-doc.org/stdlib/libdoc/net/imap/rdoc/index.html # +require 'date' require 'net/imap' require 'optparse' require 'ostruct' @@ -17,11 +18,16 @@ class MyOptions def self.parse(args) opts = OpenStruct.new opts.mailboxes = Array.new + opts.delete = "none" # Fill in $opts values parser = OptionParser.new do |myparser| myparser.banner = "Usage: #{ File.basename($0) } [opts]" myparser.separator("Specific options:") + myparser.on("--delete POLICY", "-d", + "Set delete policy to 'none' or 'old'. Default is 'none'.") do |d| + opts.delete = d + end myparser.on("--username USERNAME", "-u", "Email account to fetch. (example: \ RareCactus@gmail.com)") do |u| @@ -84,27 +90,78 @@ def format_uid(uid) return sprintf("%006d", uid) end +def format_date(date) + date.gsub!(' ', '_') +end + +def get_sanitized_email_name(mailbox, arr) + msn = mailbox.dup + msn.gsub!(' ', '_') + msn.gsub!('/', '.') + return "#{msn}_#{format_date(arr["INTERNALDATE"])}_#{format_uid(arr["UID"])}" +end + +def write_email_to_disk(mailbox, data) + arr = data[0].attr + filename = get_sanitized_email_name(mailbox, arr) + fp = File.open(filename, 'w') + fp.write(arr["RFC822.HEADER"]) + fp.write(arr["RFC822.TEXT"]) + fp.close +end + def snarf_mailbox(imap, mailbox) - imap.select(mailbox) - count = 0 - total_count = 0 - imap.search(["NOT", "DELETED"]).each do |message_id| - data = imap.fetch(message_id, [ "UID", "RFC822.HEADER", "RFC822.TEXT" ]) - a = data[0].attr - filename = "#{mailbox}#{format_uid(a["UID"])}" - fp = File.open(filename, 'w') - fp.write(a["RFC822.HEADER"]) - fp.write(a["RFC822.TEXT"]) - fp.close - count = count + 1 - total_count = total_count + 1 - if (count > 10) then - count = 0 - printf(".") - STDOUT.flush() + full_count = 0 + first_time = true + + searchterms = [ "NOT", "DELETED" ] + if $opts.delete == "old" + t = Date.today() - 365 + time_str = t.strftime("%e-%b-%Y") + searchterms << "BEFORE" << time_str + prequel = "fetched and deleted: " + elsif $opts.delete == "none" + prequel = "fetched: " + else + raise "expected one of 'old', 'none' for delete argument." + end + + while true + count = 0 + msg_seqnos = Array.new + + imap.select(mailbox) + imap.search(searchterms).each do |message_id| + if (first_time == true) then + # Print a dot immediately after making first contact with the server. + # It is reassuring to the user. + printf(".") + STDOUT.flush() + first_time = false + end + data = imap.fetch(message_id, + [ "INTERNALDATE", "UID", "RFC822.HEADER", "RFC822.TEXT" ]) + write_email_to_disk(mailbox, data) + count = count + 1 + full_count = full_count + 1 + msg_seqnos << data[0].seqno.to_i + #break if (count > 20) + end + if (count == 0) then + puts "#{prequel} #{full_count} messages from #{mailbox}" + return + end + + # Print out a dot to signify progress + printf(".") + STDOUT.flush() + + if ($opts.delete != "none"): + # Delete messages + imap.store(msg_seqnos, "+FLAGS", [:Deleted]) + imap.expunge end end - puts "fetched #{total_count} messages from #{mailbox}" end # MAIN