From f44a42f276e01514a1e4bf3028639aaea58138c6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 5 Feb 2020 13:50:31 -0400 Subject: Allow access to the message headers during mail directing Access to the headers like List-ID allows some client side filtering of incoming messages. Signed-off-by: Jason Gunthorpe --- README.md | 42 +++++++++++++++++++++++++ cloud_mdir_sync/gmail.py | 2 +- cloud_mdir_sync/messages.py | 76 ++++++++++++++++++++++++++++----------------- 3 files changed, 90 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 5e1521f..6c20d0e 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,48 @@ virtual environment. The included 'cloud-mdir-sync' script will automatically create the required virtual environment with the needed packages downloaded with pip and then run the program from within it. +## Client side filtering + +By default CMS supports only a single local MailDir. All cloud mailbox are +synchronized into it. + +With client side filtering multiple local MailDirs can be used and messages +can be routed into them. + +Here is an example of how to download from two cloud accounts into two +MailDirs: + +```python +gmail_md = MailDir("~/mail/gmail") +gmail_cloud = GMail("INBOX", GMail_Account(user="user@domain.com")) + +o365_md = MailDir("~/mail/o365") +o365_cloud = Office365("inbox", Office365_Account(user="user@domain.com")) + +def direct_message(msg): + if msg.mailbox is gmail_cloud: + return gmail_md + else: + return o365_md +cfg.direct_message = direct_message +``` + +Filtering can also inspect message headers, this example will filter messages by +List-ID: + +```python +def direct_message(msg): + list_id = msg.get_header("List-ID") + if list_id == "" + return foo_md + if list_id == "" + return bar_md + return default_md +``` + +All messages must be routed somewhere, they cannot be deleted through +directing. + # OAUTH2 Authentication Most cloud providers are now using OAUTH2, and often also provide options to diff --git a/cloud_mdir_sync/gmail.py b/cloud_mdir_sync/gmail.py index 6bd71a1..49468d4 100644 --- a/cloud_mdir_sync/gmail.py +++ b/cloud_mdir_sync/gmail.py @@ -304,7 +304,7 @@ class GMailMessage(messages.Message): self.content_hash = mailbox.msgdb.content_hashes_cloud.get( self.cid()) if self.content_hash: - self.email_id = mailbox.msgdb.content_msgid[self.content_hash] + self.fill_email_id() self.gmail_labels = gmail_labels if self.gmail_labels: self._labels_to_flags() diff --git a/cloud_mdir_sync/messages.py b/cloud_mdir_sync/messages.py index 83614b1..9490d6e 100644 --- a/cloud_mdir_sync/messages.py +++ b/cloud_mdir_sync/messages.py @@ -25,9 +25,8 @@ if TYPE_CHECKING: ContentHash_Type = str CID_Type = tuple -MBoxDict_Type = Dict["mailbox.Mailbox", Dict[ContentHash_Type, - "Message"]] CHMsgDict_Type = Dict[ContentHash_Type, "Message"] +MBoxDict_Type = Dict["mailbox.Mailbox", CHMsgDict_Type] CHMsgMappingDict_Type = Dict[ContentHash_Type, Tuple[Optional["Message"], Optional["Message"]]] @@ -42,7 +41,7 @@ class Message(object): FLAG_FLAGGED = 1 << 2 FLAG_DELETED = 1 << 3 ALL_FLAGS = FLAG_REPLIED | FLAG_READ | FLAG_FLAGGED | FLAG_DELETED - fn: str + fn: Optional[str] = None size: Optional[int] def __init__(self, mailbox, storage_id, email_id=None): @@ -65,6 +64,44 @@ class Message(object): "email_id": self.email_id } + def _read_header(self, hdr): + msgdb = self.mailbox.msgdb + if self.fn: + fn = self.fn + else: + assert self.content_hash is not None + fn = os.path.join(msgdb.hashes_dir, self.content_hash) + with open(fn, "rb") as F: + emsg = email.parser.BytesParser().parsebytes(F.read()) + # Hrm, I wonder if this is the right way to normalize a header? + val = emsg.get(hdr) + if val is None: + return None + return re.sub(r"\n[ \t]+", " ", val).strip() + + def fill_email_id(self): + """Try to fill in the email_id from our caches or by reading the + message itself""" + if self.email_id is not None: + # Check or cache the email_id provided by the Mailbox + content_msg_header = self.mailbox.msgdb.content_msg_header + oval = content_msg_header.get((self.content_hash, "message-id")) + if oval is None: + content_msg_header[(self.content_hash, "message-id")] = oval + else: + assert oval == self.email_id + return + self.email_id = self.get_header("message-id") + + def get_header(self, hdr): + """Return a email header from a message""" + hdr = hdr.lower() + content_msg_header = self.mailbox.msgdb.content_msg_header + val = content_msg_header.get((self.content_hash, hdr), False) + if val is not False: + return val + val = self._read_header(hdr) + content_msg_header[(self.content_hash, hdr)] = val class MessageDB(object): """The persistent state associated with the message database. This holds: @@ -73,7 +110,7 @@ class MessageDB(object): """ content_hashes: Dict[CID_Type, ContentHash_Type] content_hashes_cloud: Dict[CID_Type, ContentHash_Type] - content_msgid: Dict[ContentHash_Type, str] + content_msg_header: Dict[Tuple[ContentHash_Type, str], str] alt_file_hashes: Dict[ContentHash_Type, set] inode_hashes: Dict[tuple, ContentHash_Type] file_hashes: Set[str] @@ -88,7 +125,7 @@ class MessageDB(object): def __init__(self, cfg: config.Config): self.cfg = cfg self.content_hashes = {} # [cid] = content_hash - self.content_msgid = {} # [hash] = message_id + self.content_msg_header = {} # [hash,msg_header] = value self.file_hashes = set() self.alt_file_hashes = collections.defaultdict( set) # [hash] = set(fns) @@ -166,7 +203,7 @@ class MessageDB(object): for k in blacklist: del res[k] for cid, ch in res.items(): - self.content_msgid[ch] = cid[2] + self.content_msg_header[ch,"message-id"] = cid[2] self.content_hashes = res # Build a mapping with only the mailbox ID, no message_id @@ -223,24 +260,6 @@ class MessageDB(object): return (msg.content_hash is not None and msg.content_hash in self.file_hashes) - def _fill_email_id(self, msg, fn): - """Try to fill in the email_id from our caches or by reading the - message itself""" - if msg.email_id is not None: - assert self.content_msgid.get(msg.content_hash, - msg.email_id) == msg.email_id - return - - msg.email_id = self.content_msgid.get(msg.content_hash) - if msg.email_id is not None: - return - - with open(fn, "rb") as F: - emsg = email.parser.BytesParser().parsebytes(F.read()) - # Hrm, I wonder if this is the right way to normalize a header? - msg.email_id = re.sub(r"\n[ \t]+", " ", - emsg["message-id"]).strip() - def msg_from_file(self, msg, fn): """Setup msg from a local file, ie in a Maildir. This also records that we have this message in the DB""" @@ -251,10 +270,9 @@ class MessageDB(object): msg.content_hash = self._sha1_fn(fn) self.inode_hashes[inode] = msg.content_hash - self._fill_email_id(msg, fn) - self.content_msgid[msg.content_hash] = msg.email_id - self.alt_file_hashes[msg.content_hash].add(fn) msg.fn = fn + self.alt_file_hashes[msg.content_hash].add(fn) + msg.fill_email_id() def write_content(self, content_hash, dest_fn): """Make the filename dest_fn contain content_hash's content""" @@ -279,8 +297,8 @@ class MessageDB(object): self.inode_hashes[inode] = ch msg.content_hash = ch - self._fill_email_id(msg, fn) - self.content_msgid[ch] = msg.email_id + assert msg.fn is None + msg.fill_email_id() cid = msg.cid() self.content_hashes[msg.cid()] = ch -- cgit v1.2.3