Skip to content

Instantly share code, notes, and snippets.

@brett-fitz
Created January 26, 2024 15:51
Show Gist options
  • Save brett-fitz/ef99f1df3dab6df936f844477356251f to your computer and use it in GitHub Desktop.
Save brett-fitz/ef99f1df3dab6df936f844477356251f to your computer and use it in GitHub Desktop.
Google Drive download all my items and those that are shared with me
import os
import re
import io
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
# Define the scope for the Google Drive API
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
def sanitize_filename(name):
"""Sanitizes a filename by removing or replacing invalid characters.
Args:
name (str): The original filename.
Returns:
str: The sanitized filename.
"""
return re.sub(r'[\\/*?:"<>|]', '_', name)
def download_file(service, file_id, file_path, mime_type):
"""Downloads a file from Google Drive.
This function downloads a file based on its MIME type. For Google Docs,
Sheets, and Slides, it converts them to their corresponding Microsoft
Office formats before downloading.
Args:
service: The Google Drive API service instance.
file_id (str): The ID of the file to download.
file_path (str): The local path to save the file to.
mime_type (str): The MIME type of the file.
"""
if os.path.exists(file_path):
print(f"File {file_path} already exists, skipping.")
return
try:
print(f"Downloading {file_path}...")
if mime_type.startswith('application/vnd.google-apps.'):
if mime_type == 'application/vnd.google-apps.document':
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
file_path += '.docx'
elif mime_type == 'application/vnd.google-apps.spreadsheet':
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
file_path += '.xlsx'
elif mime_type == 'application/vnd.google-apps.presentation':
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.presentationml.presentation')
file_path += '.pptx'
else:
request = service.files().get_media(fileId=file_id)
fh = io.FileIO(file_path, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
print(f"Download {int(status.progress() * 100)}%.")
except Exception as e:
print(f"An error occurred: {e}")
def download_folder(service, folder_id, folder_path):
"""Recursively downloads the contents of a folder from Google Drive.
This function downloads all files in the specified folder, and recursively
downloads the contents of any subfolders.
Args:
service: The Google Drive API service instance.
folder_id (str): The ID of the folder to download.
folder_path (str): The local path to save the contents of the folder to.
"""
if not os.path.exists(folder_path):
os.makedirs(folder_path)
response = service.files().list(q=f"'{folder_id}' in parents",
spaces='drive',
fields='nextPageToken, files(id, name, mimeType)').execute()
for file in response.get('files', []):
file_name = sanitize_filename(file.get('name'))
file_id = file.get('id')
file_path = os.path.join(folder_path, file_name)
if file.get('mimeType') == 'application/vnd.google-apps.folder':
download_folder(service, file_id, file_path)
else:
download_file(service, file_id, file_path, file.get('mimeType'))
def main():
"""Main function to authenticate and initiate the file download process."""
creds = None
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
service = build('drive', 'v3', credentials=creds)
page_token = None
while True:
response = service.files().list(
q="'me' in owners",
spaces='drive',
fields='nextPageToken, files(id, name, mimeType)',
pageToken=page_token).execute()
for file in response.get('files', []):
filename = os.path.join('backup/', sanitize_filename(file.get('name')))
if file.get('mimeType') == 'application/vnd.google-apps.folder':
download_folder(service, file.get('id'), filename)
else:
download_file(service, file.get('id'), filename, file.get('mimeType'))
page_token = response.get('nextPageToken', None)
if page_token is None:
break
if __name__ == '__main__':
main()
@brett-fitz
Copy link
Author

brett-fitz commented Jan 26, 2024

Dependencies:

python = "^3.11"
google-api-python-client = "^2.115.0"
google-auth-httplib2 = "^0.2.0"
google-auth-oauthlib = "^1.2.0"

Notes:

  • All items are downloaded to backup/*
  • Folders are recursively downloaded, keeping their directory structure
  • Files already downloaded will be skipped (interrupt protection)
  • Google Docs, Spreadsheets, and presentations are exported to openxml formats
  • Special characters are replaced in names that might be problematic for MacOS

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment