From 87f34026e85dfd73b68568bc46db555c173be1f4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 24 Feb 2020 11:28:37 -0400 Subject: gmail: Fix determining the start_history_id Occasionally gmail returns history records that have junk in them, for instance messages IDs that don't exist. This appears to be a bug. Using the latest history ID seems to guarantee that a subsequent history query will be empty and we can then avoid the junk and start again. Go back to full query if delta fails for any reason. Signed-off-by: Jason Gunthorpe --- cloud_mdir_sync/gmail.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/cloud_mdir_sync/gmail.py b/cloud_mdir_sync/gmail.py index b1d2b55..8b03e66 100644 --- a/cloud_mdir_sync/gmail.py +++ b/cloud_mdir_sync/gmail.py @@ -409,7 +409,9 @@ class GMailMailbox(mailbox.Mailbox): async def _fetch_all_messages(self): """Perform a full synchronization of the mailbox""" - start_history_id = None + profile = await self.gmail.get_json("v1","/users/me/profile") + start_history_id = profile["historyId"] + todo = [] msgs = [] async for jmsg in self.gmail.get_json_paged( @@ -424,8 +426,6 @@ class GMailMailbox(mailbox.Mailbox): else: todo.append(asyncio.create_task(self._fetch_metadata(msg))) msgs.append(msg) - if todo: - start_history_id = await todo[0] await asyncio_complete(*todo) return (msgs, start_history_id) @@ -516,15 +516,21 @@ class GMailMailbox(mailbox.Mailbox): async def update_message_list(self): """Retrieve the list of all messages and store all the message content in the content_hash message database""" - if self.history_delta is None or self.history_delta[1] is None: + if self.history_delta is None: # For whatever reason, there is usually more history than is # suggested by the history_id from the messages.list, so always # drain it out. self.history_delta = await self._fetch_all_messages() - self.history_delta = await self._fetch_delta_messages( - start_history_id=self.history_delta[1], - old_msgs=self.history_delta[0]) + try: + self.history_delta = await self._fetch_delta_messages( + start_history_id=self.history_delta[1], + old_msgs=self.history_delta[0]) + except: + # If we fail to read a delta then the history is lost/garbage, + # start again from full sync. + self.history_delta = None; + raise self.messages = { msg.content_hash: msg -- cgit v1.2.3