Skip to content

Instantly share code, notes, and snippets.

@leoblum
Last active January 22, 2021 11:42
Show Gist options
  • Save leoblum/2de06c42544db73ff9ba036989190155 to your computer and use it in GitHub Desktop.
Save leoblum/2de06c42544db73ff9ba036989190155 to your computer and use it in GitHub Desktop.
Make backup of emails from Gmail without 3rd-party programs, APIs, etc
import requests
import json
import os
from datetime import timedelta
from time import time
output_folder = '.data/gmail_u0'
cache_file = os.path.join(output_folder, '.cache')
os.makedirs(output_folder, exist_ok=True)
# "Copy as cURL" from Chrome Developer Tools > Filter, type: bv?
chrome_req = r'''
'''
req_url = ''
headers = {}
payload = ''
for i, x in enumerate([x.strip() for x in chrome_req.split('\n') if len(x)]):
if x.startswith('-H'):
x = "'".join(x.split("'")[1:-1])
x = x.split(':')
k, v = x[0].strip(), ':'.join(x[1:]).strip()
headers[k] = v
continue
if x.startswith('--data-binary'):
payload = "'".join(x.split("'")[1:-1])
continue
if x.startswith('curl'):
req_url = "'".join(x.split("'")[1:-1])
req_url = req_url.replace('c=' + req_url.split('c=')[1].split('&')[0], 'c=%s')
continue
def get_emails_ids(use_cache=False):
if use_cache and os.path.exists(cache_file):
with open(cache_file) as fp:
return json.load(fp)
emails = set()
for x in range(999999):
data = json.loads(payload)
data['1']['10'] = x
data = json.dumps(payload)
rep = requests.post(req_url % x, headers=headers, data=data)
res = rep.json()
emails_size = len(emails)
try:
for top_email in res['3']:
emails.add(top_email['1']['20'])
for thread_email in top_email['1']['5']:
emails.add(thread_email['56'])
except KeyError:
pass
size_diff = len(emails) - emails_size
print('% 3d. Total: %d. New: %d' % (x, len(emails), size_diff))
if size_diff == 0:
break
emails = list(emails)
return emails
emails = get_emails_ids(use_cache=True)
with open(cache_file, 'w') as fp:
json.dump(emails, fp)
downloaded = [x.split('.eml')[0] for x in os.listdir(output_folder) if x.endswith('.eml')]
downloaded = [x for x in downloaded if x in emails]
start_time = time()
last_time = start_time
counter = len(downloaded)
for x in emails:
if x in downloaded:
continue
url = 'https://mail.google.com/mail/u/0?view=att&th=%s&attid=0&disp=comp&safe=1&zw' % x
rep = requests.get(url, headers=headers)
if rep.status_code != 200:
print('error on %s with code %s' % (x, rep.status_code))
continue
with open(os.path.join(output_folder, '%s.eml' % x), 'bw') as fp:
fp.write(rep.content)
counter += 1
current_time = time()
if current_time - last_time > 20:
avg_speed = (counter - len(downloaded)) / (current_time - start_time)
time_left = (len(emails) - counter) / avg_speed
time_left = timedelta(seconds=int(time_left))
last_time = current_time
print('%d of %d emails downloaded. time left: %s' % (counter, len(emails), time_left))
@leoblum
Copy link
Author

leoblum commented Jan 22, 2021

Than you can upload emails to Thunderbird by this guide:
https://support.mozilla.org/en-US/questions/1215791#answer-1106597

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment