aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2020-02-24 11:28:37 -0400
committerJason Gunthorpe <jgg@mellanox.com>2020-02-24 11:28:37 -0400
commit87f34026e85dfd73b68568bc46db555c173be1f4 (patch)
treea89d19d2f6b7721461bf1ca13c8ba6a1b36d3f0f
parent0643cc2f06f175451439a4bed71478ff6825494a (diff)
downloadcloud_mdir_sync-87f34026e85dfd73b68568bc46db555c173be1f4.tar.gz
cloud_mdir_sync-87f34026e85dfd73b68568bc46db555c173be1f4.tar.bz2
cloud_mdir_sync-87f34026e85dfd73b68568bc46db555c173be1f4.zip
gmail: Fix determining the start_history_id
Occasionally gmail returns history records that have junk in them, for instance messages IDs that don't exist. This appears to be a bug. Using the latest history ID seems to guarantee that a subsequent history query will be empty and we can then avoid the junk and start again. Go back to full query if delta fails for any reason. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--cloud_mdir_sync/gmail.py20
1 files changed, 13 insertions, 7 deletions
diff --git a/cloud_mdir_sync/gmail.py b/cloud_mdir_sync/gmail.py
index b1d2b55..8b03e66 100644
--- a/cloud_mdir_sync/gmail.py
+++ b/cloud_mdir_sync/gmail.py
@@ -409,7 +409,9 @@ class GMailMailbox(mailbox.Mailbox):
async def _fetch_all_messages(self):
"""Perform a full synchronization of the mailbox"""
- start_history_id = None
+ profile = await self.gmail.get_json("v1","/users/me/profile")
+ start_history_id = profile["historyId"]
+
todo = []
msgs = []
async for jmsg in self.gmail.get_json_paged(
@@ -424,8 +426,6 @@ class GMailMailbox(mailbox.Mailbox):
else:
todo.append(asyncio.create_task(self._fetch_metadata(msg)))
msgs.append(msg)
- if todo:
- start_history_id = await todo[0]
await asyncio_complete(*todo)
return (msgs, start_history_id)
@@ -516,15 +516,21 @@ class GMailMailbox(mailbox.Mailbox):
async def update_message_list(self):
"""Retrieve the list of all messages and store all the message content
in the content_hash message database"""
- if self.history_delta is None or self.history_delta[1] is None:
+ if self.history_delta is None:
# For whatever reason, there is usually more history than is
# suggested by the history_id from the messages.list, so always
# drain it out.
self.history_delta = await self._fetch_all_messages()
- self.history_delta = await self._fetch_delta_messages(
- start_history_id=self.history_delta[1],
- old_msgs=self.history_delta[0])
+ try:
+ self.history_delta = await self._fetch_delta_messages(
+ start_history_id=self.history_delta[1],
+ old_msgs=self.history_delta[0])
+ except:
+ # If we fail to read a delta then the history is lost/garbage,
+ # start again from full sync.
+ self.history_delta = None;
+ raise
self.messages = {
msg.content_hash: msg