diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2020-02-24 11:28:37 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2020-02-24 11:28:37 -0400 |
commit | 87f34026e85dfd73b68568bc46db555c173be1f4 (patch) | |
tree | a89d19d2f6b7721461bf1ca13c8ba6a1b36d3f0f /cloud_mdir_sync/gmail.py | |
parent | 0643cc2f06f175451439a4bed71478ff6825494a (diff) | |
download | cloud_mdir_sync-87f34026e85dfd73b68568bc46db555c173be1f4.tar.gz cloud_mdir_sync-87f34026e85dfd73b68568bc46db555c173be1f4.tar.bz2 cloud_mdir_sync-87f34026e85dfd73b68568bc46db555c173be1f4.zip |
gmail: Fix determining the start_history_id
Occasionally gmail returns history records that have junk in them, for
instance messages IDs that don't exist. This appears to be a bug.
Using the latest history ID seems to guarantee that a subsequent history
query will be empty and we can then avoid the junk and start again.
Go back to full query if delta fails for any reason.
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'cloud_mdir_sync/gmail.py')
-rw-r--r-- | cloud_mdir_sync/gmail.py | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/cloud_mdir_sync/gmail.py b/cloud_mdir_sync/gmail.py index b1d2b55..8b03e66 100644 --- a/cloud_mdir_sync/gmail.py +++ b/cloud_mdir_sync/gmail.py @@ -409,7 +409,9 @@ class GMailMailbox(mailbox.Mailbox): async def _fetch_all_messages(self): """Perform a full synchronization of the mailbox""" - start_history_id = None + profile = await self.gmail.get_json("v1","/users/me/profile") + start_history_id = profile["historyId"] + todo = [] msgs = [] async for jmsg in self.gmail.get_json_paged( @@ -424,8 +426,6 @@ class GMailMailbox(mailbox.Mailbox): else: todo.append(asyncio.create_task(self._fetch_metadata(msg))) msgs.append(msg) - if todo: - start_history_id = await todo[0] await asyncio_complete(*todo) return (msgs, start_history_id) @@ -516,15 +516,21 @@ class GMailMailbox(mailbox.Mailbox): async def update_message_list(self): """Retrieve the list of all messages and store all the message content in the content_hash message database""" - if self.history_delta is None or self.history_delta[1] is None: + if self.history_delta is None: # For whatever reason, there is usually more history than is # suggested by the history_id from the messages.list, so always # drain it out. self.history_delta = await self._fetch_all_messages() - self.history_delta = await self._fetch_delta_messages( - start_history_id=self.history_delta[1], - old_msgs=self.history_delta[0]) + try: + self.history_delta = await self._fetch_delta_messages( + start_history_id=self.history_delta[1], + old_msgs=self.history_delta[0]) + except: + # If we fail to read a delta then the history is lost/garbage, + # start again from full sync. + self.history_delta = None; + raise self.messages = { msg.content_hash: msg |