#! /usr/bin/env python
# -*- coding: utf-8; mode: python -*-

# Copyright (C) 2006-2010 Joel Rosdahl <joel@rosdahl.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301, USA.

u"""maildirproc -- maildir processor

http://joel.rosdahl.net/maildirproc/

maildirproc is a small program that processes one or several existing
mail boxes in the maildir format. It is primarily focused on mail
sorting -- i.e., moving, copying, forwarding and deleting mail
according to a set of rules. It can be seen as an alternative to
procmail, but instead of being a delivery agent (which wants to be
part of the delivery chain), maildirproc only processes already
delivered mail. And that's a feature, not a bug.
"""


import hashlib
import locale
import os
import random
import re
import shutil
import socket
import subprocess
import sys
import time
from email import errors as email_errors
from email import header as email_header
from email import parser as email_parser
from optparse import OptionParser
from io import open


if sys.version_info[0] >= 3:
    basestring = unicode
else:
    def ascii(x):
        # We don't want the "u" prefix in repr'd strings in Python 2.x.
        r = repr(x)
        if r.startswith(u"u"):
            return r[1:]
        else:
            return r


def offset_to_timezone(offset):
    if offset <= 0:
        sign = u"+"
        offset = -offset
    else:
        sign = u"-"
    return u"{0}{1:0>2}{2:0>2}".format(
        sign, offset // 3600, (offset % 3600) // 60)


def iso_8601_now():
    now = time.time()
    milliseconds = int(1000 * (now - int(now)))
    first = time.strftime(u"%Y-%m-%d %H:%M:%S", time.localtime(now))
    return u"{0}.{1:0>3} {2}".format(
        first,
        milliseconds,
        offset_to_timezone(time.altzone))


def sha1sum(fp):
    sha_obj = hashlib.sha1()
    while True:
        data = fp.read(4096)
        if not data:
            break
        sha_obj.update(data)
    return sha_obj.hexdigest()


def safe_write(fp, s):
    line = s + u"\n"
    try:
        fp.write(line)
    except UnicodeEncodeError:
        fp.write(ascii(line))


class MailHeader(object):
    def __init__(self, mail, name, text):
        self._mail = mail
        self._name = name
        self._text = text

    def __eq__(self, x):
        return self._text == x

    def __ne__(self, x):
        return not (self._text == x)

    def __str__(self):
        return self._text

    def __repr__(self):
        return repr(self._text)

    def contains(self, string):
        result = string.lower() in self._text.lower()
        if result:
            result_text = u"contains"
        else:
            result_text = u"does not contain"
        self._mail.processor.log_debug(
            u"... Header \"{0}\" {1} {2}".format(
                self._name, result_text, ascii(string)))
        return result

    def matches(self, regexp):
        result = re.search(
            regexp, self._text, re.IGNORECASE | re.MULTILINE | re.UNICODE)
        if result:
            result_text = u"matches"
        else:
            result_text = u"does not match"
        self._mail.processor.log_debug(
            u"... Header \"{0}\" {1} {2}".format(
                self._name, result_text, ascii(regexp)))
        return result


class MailTarget(object):
    _target_headers = [u"to", u"cc"]

    def __init__(self, mail):
        self._mail = mail

    def contains(self, string):
        result = self._helper(u"contains", string)
        if result:
            result_text = u"contains"
        else:
            result_text = u"does not contain"
        self._mail.processor.log_debug(
            u"... Target {0} {1}".format(result_text, ascii(string)))
        return result

    def matches(self, regexp):
        result = self._helper(u"matches", regexp)
        if result:
            result_text = u"matches"
        else:
            result_text = u"does not match"
        self._mail.processor.log_debug(
            u"... Target {0} {1}".format(result_text, ascii(regexp)))
        return result

    # ----------------------------------------------------------------

    def _helper(self, method_name, arg):
        for header in self._target_headers:
            m = getattr(self._mail[header], method_name)
            if m(arg):
                return True
        return False


class MailBase(object):
    def __init__(self, processor, maildir, mail_path):
        self._processor = processor
        self._maildir = maildir
        self._path = mail_path
        self._target = MailTarget(self)
        self._headers = {}
        if self._parse_mail():
            self._log_processing()

    @property
    def maildir(self):
        return self._maildir

    @property
    def path(self):
        return self._path

    @property
    def processor(self):
        return self._processor

    @property
    def target(self):
        return self._target

    def __getitem__(self, header_name):
        return MailHeader(
            self, header_name, self._headers.get(header_name.lower(), u""))

    def from_mailing_list(self, list_name):
        list_name = list_name.lower()
        for headername in [
                u"delivered-to", u"mailing-list", u"x-beenthere",
                u"x-mailing-list"]:
            if self[headername].contains(list_name):
                self._processor.log_debug(
                    u"... Mail is on mailing list {0}".format(list_name))
                return True
        self._processor.log_debug(
            u"... Mail is not on mailing list {0}".format(list_name))
        return False

    # ----------------------------------------------------------------

    def _log_processing(self):
        try:
            fp = open(self.path, u"rb")
        except IOError, e:
            # The file was probably (re)moved by some other process.
            self._processor.log_mail_opening_error(self.path, e)
            return
        self._processor.log(u"SHA1:       {0}".format(ascii(sha1sum(fp))))
        for name in u"Message-ID Subject Date From To Cc".split():
            self._processor.log(
                u"{0:<11} {1}".format(name + u":", ascii(self[name])))

    def _parse_mail(self):
        # We'll just use some encoding that handles all byte values
        # without bailing out. Non-ASCII characters should not exist
        # in the headers according to email standards, but if they do
        # anyway, we mustn't crash.
        encoding = u"iso-8859-1"

        self._processor.log(u"")
        self._processor.log(u"New mail detected at {0}:".format(iso_8601_now()))
        self._processor.log(u"Path:       {0}".format(ascii(self.path)))
        try:
            fp = open(self.path, encoding=encoding)
        except IOError, e:
            # The file was probably (re)moved by some other process.
            self._processor.log_mail_opening_error(self.path, e)
            return False
        headers = email_parser.Parser().parse(fp, headersonly=True)
        fp.close()
        for name in headers.keys():
            value_parts = []
            for header in headers.get_all(name, []):
                try:
                    for (s, c) in email_header.decode_header(header):
                        # email.header.decode_header in Python 3.x may
                        # return either [(str, None)] or [(bytes,
                        # None), ..., (bytes, encoding)]. We must
                        # compensate for this.
                        if not isinstance(s, unicode):
                            s = s.decode(c if c else u"ascii")
                        value_parts.append(s)
                except (email_errors.HeaderParseError, LookupError,
                        ValueError):
                    self._processor.log_error(
                        u"Error: Could not decode header {0}".format(
                            ascii(header)))
                    value_parts.append(header)
            self._headers[name.lower()] = u" ".join(value_parts)
        return True


class DryRunMail(MailBase):
    def copy(self, maildir):
        self._processor.log(u"==> Copying to {0}".format(maildir))

    def delete(self):
        self._processor.log(u"==> Deleting")

    def forward(self, addresses, env_sender=None):
        self._forward(True, addresses, env_sender)

    def forward_copy(self, addresses, env_sender=None):
        self._forward(False, addresses, env_sender)

    def move(self, maildir):
        self._processor.log(u"==> Moving to {0}".format(maildir))

    # ----------------------------------------------------------------

    def _forward(self, delete, addresses, env_sender):
        if isinstance(addresses, basestring):
            addresses = [addresses]
        else:
            addresses = list(addresses)
        if not delete:
            copy = u" copy"
        else:
            copy = u""
        self._processor.log(
            u"==> Forwarding{0} to {1!r}{2}".format(
                copy,
                addresses,
                u" (envelope sender: {0}".format(env_sender)
                    if env_sender is not None else u""))


class Mail(MailBase):
    def copy(self, maildir):
        self._processor.log(u"==> Copying to {0}".format(maildir))
        self._copy(maildir)

    def delete(self):
        self._processor.log(u"==> Deleting")
        self._delete()

    def forward(self, addresses, env_sender=None):
        self._forward(True, addresses, env_sender)

    def forward_copy(self, addresses, env_sender=None):
        self._forward(False, addresses, env_sender)

    def move(self, maildir):
        self._processor.log(u"==> Moving to {0}".format(maildir))
        flagpart = self._get_flagpart()
        target = os.path.join(
            self._processor.maildir_base,
            maildir,
            self.path.split(os.sep)[-2], # new/cur
            self._processor.create_maildir_name() + flagpart)
        self._processor.rename(self.path, target)

    # ----------------------------------------------------------------

    def _copy(self, maildir):
        try:
            source_fp = open(self.path, u"rb")
        except IOError, e:
            # The file was probably (re)moved by some other process.
            self._processor.log_mail_opening_error(self.path, e)
            return

        tmp_target = os.path.join(
            self._processor.maildir_base,
            maildir,
            u"tmp",
            self._processor.create_maildir_name())
        try:
            tmp_target_fp = os.fdopen(
                os.open(tmp_target, os.O_WRONLY | os.O_CREAT | os.O_EXCL),
                u"wb")
        except IOError, e:
            self._processor.log_io_error(
                u"Could not open {0} for writing".format(tmp_target),
                e)
            return
        try:
            shutil.copyfileobj(source_fp, tmp_target_fp)
            source_fp.close()
            tmp_target_fp.close()
        except IOError, e:
            self._processor.log_io_error(
                u"Could not copy {0} to {1}".format(self.path, tmp_target),
                e)
            return

        flagpart = self._get_flagpart()
        target = os.path.join(
            self._processor.maildir_base,
            maildir,
            self.path.split(os.sep)[-2], # new/cur
            self._processor.create_maildir_name() + flagpart)
        try:
            self._processor.rename(tmp_target, target)
        except IOError, e:
            self._processor.log_io_error(
                u"Could not rename {0} to {1}".format(tmp_target, target),
                e)

    def _delete(self):
        try:
            os.unlink(self.path)
        except OSError, e:
            # The file was probably moved.
            self._processor.log_io_error(
                u"Could not delete {0}; some other process probably (re)moved"
                u" it".format(self.path),
                e)

    def _forward(self, delete, addresses, env_sender):
        if isinstance(addresses, basestring):
            addresses = [addresses]
        else:
            addresses = list(addresses)
        if delete:
            copy = u""
        else:
            copy = u" copy"
        flags = self._processor.sendmail_flags
        if env_sender is not None:
            flags += u" -f {0}".format(env_sender)

        self._processor.log(u"==> Forwarding{0} to {1!r}".format(copy, addresses))
        try:
            source_fp = open(self.path, u"rb")
        except IOError, e:
            # The file was probably moved.
            self._processor.log_mail_opening_error(self.path, e)
            return

        p = subprocess.Popen(
            u"{0} {1} -- {2}".format(
                self._processor.sendmail,
                flags,
                u" ".join(addresses)
                ),
            shell=True,
            stdin=subprocess.PIPE)
        shutil.copyfileobj(source_fp, p.stdin)
        p.stdin.close()
        p.wait()
        source_fp.close()

        if delete:
            self._delete()

    def _get_flagpart(self):
        parts = os.path.basename(self.path).split(u":2,")
        if len(parts) == 2:
            return u":2," + parts[1]
        else:
            return u""


class MaildirProcessor(object):
    def __init__(
            self, rcfile, log_fp, log_level=1, dry_run=False, run_once=False,
            auto_reload_rcfile=False):
        self._rcfile = rcfile
        self._log_fp = log_fp
        self._log_level = log_level
        self._run_once = run_once or dry_run
        self._auto_reload_rcfile = auto_reload_rcfile
        self._maildir_base = None
        self._deliveries = 0
        self._maildirs = []
        self._sendmail = u"/usr/sbin/sendmail"
        self._sendmail_flags = u"-i"
        self.rcfile_modified = False
        self._previous_rcfile_mtime = self._get_previous_rcfile_mtime()
        if dry_run:
            self._mail_class = DryRunMail
        else:
            self._mail_class = Mail

    def get_auto_reload_rcfile(self):
        return self._auto_reload_rcfile
    def set_auto_reload_rcfile(self, value):
        self._auto_reload_rcfile = value
    auto_reload_rcfile = property(
        get_auto_reload_rcfile, set_auto_reload_rcfile)

    def set_logfile(self, path_or_fp):
        if isinstance(path_or_fp, basestring):
            self._log_fp = open(
                os.path.expanduser(path_or_fp),
                u"a",
                errors=u"backslashreplace")
        else:
            self._log_fp = path_or_fp
    logfile = property(fset=set_logfile)

    def get_maildir_base(self):
        return self._maildir_base
    def set_maildir_base(self, path):
        self._maildir_base = os.path.expanduser(path)
    maildir_base = property(get_maildir_base, set_maildir_base)

    def get_maildirs(self):
        return self._maildirs
    def set_maildirs(self, maildirs):
        self._maildirs = maildirs
    maildirs = property(get_maildirs, set_maildirs)

    @property
    def rcfile(self):
        return self._rcfile

    def get_sendmail(self):
        return self._sendmail
    def set_sendmail(self, sendmail):
        self._sendmail = sendmail
    sendmail = property(get_sendmail, set_sendmail)

    def get_sendmail_flags(self):
        return self._sendmail_flags
    def set_sendmail_flags(self, sendmail_flags):
        self._sendmail_flags = sendmail_flags
    sendmail_flags = property(get_sendmail_flags, set_sendmail_flags)

    def __iter__(self):
        if not self._maildirs:
            self.fatal_error(u"Error: No maildirs to process")

        self.rcfile_modified = False
        mtime_map = {}
        while True:
            if self.auto_reload_rcfile:
                current_rcfile_mtime = self._get_previous_rcfile_mtime()
                if current_rcfile_mtime != self._previous_rcfile_mtime:
                    self._previous_rcfile_mtime = current_rcfile_mtime
                    self.rcfile_modified = True
                    self.log_info(u"Detected modified RC file; reloading")
                    break
            for maildir in self._maildirs:
                maildir_path = os.path.join(self._maildir_base, maildir)
                for subdir in [u"cur", u"new"]:
                    subdir_path = os.path.join(maildir_path, subdir)
                    cur_mtime = os.path.getmtime(subdir_path)
                    if cur_mtime != mtime_map.setdefault(subdir_path, 0):
                        if cur_mtime < int(time.time()):
                            # If cur_mtime == int(time.time()) we
                            # can't be sure that everything has been
                            # processed; a new mail may be delivered
                            # later the same second.
                            mtime_map[subdir_path] = cur_mtime
                        for mail_file in os.listdir(subdir_path):
                            mail_path = os.path.join(subdir_path, mail_file)
                            yield self._mail_class(self, maildir, mail_path)
            if self._run_once:
                break
            time.sleep(1)

    def log(self, text, level=1):
        if level <= self._log_level:
            safe_write(self._log_fp, text)
            self._log_fp.flush()

    def log_debug(self, text):
        self.log(text, 2)

    def log_error(self, text):
        self.log(text, 0)

    def log_info(self, text):
        self.log(text, 1)

    def fatal_error(self, text):
        self.log_error(text)
        safe_write(sys.stderr, text)
        sys.exit(1)

    # ----------------------------------------------------------------
    # Interface used by MailBase and descendants:

    def create_maildir_name(self):
        u"""Create and return a unique name for a Maildir message."""
        hostname = socket.gethostname()
        hostname = hostname.replace(u"/", u"\\057")
        hostname = hostname.replace(u":", u"\\072")
        now = time.time()
        delivery_identifier = u"M{0}P{1}Q{2}R{3:0>8x}".format(
            round((now - int(now)) * 1000000),
            os.getpid(),
            self._deliveries,
            random.randint(0, 0xffffffff))
        self._deliveries += 1
        return u"{0}.{1}.{2}".format(now, delivery_identifier, hostname)

    def log_io_error(self, errmsg, os_errmsg):
        self.log_error(
            u"Error: {0} (error message from OS: {1})".format(
                errmsg, os_errmsg))

    def log_mail_opening_error(self, path, errmsg):
        self.log_io_error(
            u"Could not open {0}; some other process probably (re)moved"
            u" it".format(path),
            errmsg)

    def rename(self, source, target):
        try:
            os.rename(source, target)
        except OSError, e:
            self.log_error(
                u"Error: Could not rename {0} to {1}: {2}".format(
                    source, target, e))


    # ----------------------------------------------------------------
    # Private methods:

    def _get_previous_rcfile_mtime(self):
        if self.rcfile == u"-":
            return None
        else:
            try:
                return os.path.getmtime(self.rcfile)
            except OSError:
                # File does not exist.
                return None

######################################################################

def main(argv):
    maildirproc_directory = u"~/.maildirproc"
    default_rcfile_location = os.path.join(maildirproc_directory, u"default.rc")
    default_logfile_location = os.path.join(maildirproc_directory, u"log")

    if not os.path.isdir(os.path.expanduser(maildirproc_directory)):
        os.mkdir(os.path.expanduser(maildirproc_directory))

    parser = OptionParser(
        version=u"1.0",
        description=(
            u"maildirproc is a program that scans a number of maildir mail"
            u" boxes and processes found mail as defined by an rc file. See"
            u" http://joel.rosdahl.net/maildirproc/ for more information."))
    parser.add_option(
        u"--auto-reload-rcfile",
        action=u"store_true",
        default=False,
        help=(
            u"turn on automatic reloading of the rc file when it has been"
            u" modified"))
    parser.add_option(
        u"--dry-run",
        action=u"store_true",
        default=False,
        help=(
            u"just log what should have been done; implies --once"))
    parser.add_option(
        u"-l",
        u"--logfile",
        type=u"string",
        dest=u"logfile",
        metavar=u"FILE",
        help=u"send log to FILE instead of the default ({0})".format(
            default_logfile_location),
        default=default_logfile_location)
    parser.add_option(
        u"--log-level",
        type=u"int",
        metavar=u"INTEGER",
        help=(
            u"only include log messages with this log level or lower; defaults"
            u" to 1"),
        default=1)
    parser.add_option(
        u"-m",
        u"--maildir",
        action=u"append",
        type=u"string",
        default=[],
        dest=u"maildirs",
        metavar=u"DIRECTORY",
        help=(
            u"add DIRECTORY to the set of maildir directories to process (can"
            u" be passed multiple times); if DIRECTORY is relative, it is"
            u" relative to the maildir base directory"))
    parser.add_option(
        u"-b",
        u"--maildir-base",
        type=u"string",
        default=u".",
        dest=u"maildir_base",
        metavar=u"DIRECTORY",
        help=u"set maildir base directory; defaults to the current working"
             u" directory")
    parser.add_option(
        u"--once",
        action=u"store_true",
        default=False,
        help=(
            u"only process the maildirs once and then exit; without this flag,"
            u" maildirproc will scan the maildirs continuously"))
    parser.add_option(
        u"-r",
        u"--rcfile",
        type=u"string",
        dest=u"rcfile",
        metavar=u"FILE",
        help=(
            u"use the given rc file instead of the default ({0})".format(
                default_rcfile_location)),
        default=default_rcfile_location)
    parser.add_option(
        u"--test",
        action=u"store_true",
        default=False,
        help=(
            u"test mode; implies --dry-run, --once, --logfile=- and"
            u" --verbose"))
    parser.add_option(
        u"-v",
        u"--verbose",
        action=u"count",
        default=0,
        dest=u"verbosity",
        help=u"increase log level one step")
    (options, _) = parser.parse_args(argv[1:])

    if options.test:
        options.dry_run = True
        options.logfile = u"-"
        options.verbosity = max(1, options.verbosity)

    if options.dry_run:
        options.once = True

    if options.logfile == u"-":
        log_fp = sys.stdout
    else:
        log_fp = open(
            os.path.expanduser(options.logfile),
            u"a",
            encoding=locale.getpreferredencoding(),
            errors=u"backslashreplace")

    log_level = options.log_level + options.verbosity

    rcfile = os.path.expanduser(options.rcfile)
    processor = MaildirProcessor(
        rcfile, log_fp, log_level, options.dry_run, options.once,
        options.auto_reload_rcfile)
    processor.log(u"")
    processor.log(
        u"Starting maildirproc {0} at {1}".format(
            parser.version, iso_8601_now()))

    processor.maildir_base = options.maildir_base
    if options.maildirs:
        processor.maildirs = options.maildirs
    if u"SENDMAIL" in os.environ:
        processor.sendmail = os.environ[u"SENDMAIL"]
    if u"SENDMAILFLAGS" in os.environ:
        processor.sendmail_flags = os.environ[u"SENDMAILFLAGS"]
    environment = {u"processor": processor}

    if rcfile == u"-":
        processor.log(u"RC file: <standard input>")
        rc = sys.stdin.read()
        exec(rc, environment)
    else:
        processor.log(u"RC file: {0}".format(ascii(rcfile)))
        while True:
            try:
                rc = open(rcfile).read()
            except IOError, e:
                processor.fatal_error(
                    u"Error: Could not open RC file: {0}".format(e))
            else:
                exec(rc, environment)
                if not processor.rcfile_modified:
                    # Normal exit.
                    break
                # We should reload the RC file.


try:
    main(sys.argv)
except KeyboardInterrupt:
    pass
