Last active
September 17, 2024 12:38
-
-
Save nicolevanderhoeven/b25c97a8a68ea97e7bdf5ee674fdaec4 to your computer and use it in GitHub Desktop.
Download Roam images locally for use with Obsidian vault
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files. | |
# To use, replace PATH in the variable vaultDir with your vault's root directory. | |
# This automatically puts filenames in /assets - change the newFilePath variable if you want to change this | |
import re | |
import glob | |
import os | |
import requests | |
import calendar | |
import time | |
vaultDir = '/PATH' | |
firebaseShort = 'none' | |
fullRead = 'none' | |
fileFullPath = '' | |
fullTempFilePath = '' | |
i = 0 | |
ext = '' | |
# Walk through all files in all directories within the specified vault directory | |
for subdir, dirs, files in os.walk(vaultDir): | |
for file in files: | |
# Open file in directory | |
fileFullPath = os.path.join(subdir,file) | |
fhand = open(fileFullPath, errors='ignore') | |
for line in fhand: | |
# Download the Firebase file and save it in the assets folder | |
if 'firebasestorage' in line: | |
try: | |
# If it's a PDF, it will be in the format {{pdf: link}} | |
if '{{pdf:' in line: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line) | |
else: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line) | |
firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png | |
firebaseUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6 | |
# Download the file locally | |
r = requests.get(firebaseUrl) | |
timestamp = calendar.timegm(time.gmtime()) | |
# Get file extension of file. Ex: .png; .jpeg | |
reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg | |
ext = '.' + reg.group(2) # .jpeg | |
# Create assets folder if it doesn't exist | |
if not os.path.exists(vaultDir + '/assets'): | |
os.makedirs(vaultDir + '/assets') | |
# Create new local file out of downloaded firebase file | |
newFilePath = 'assets/' + str(timestamp) + '_' + str(i) + ext | |
# print(firebaseUrl + '>>>' + newFilePath) | |
with open(vaultDir + '/' + newFilePath,'wb') as output_file: | |
output_file.write(r.content) | |
except AttributeError: # This is to prevent the AttributeError exception when no matches are returned | |
continue | |
# Save Markdown file with new local file link as a temp file | |
# If there is already a temp version of a file, open that. | |
fullTempFilePath = vaultDir + '/temp_' + file | |
if os.path.exists(fullTempFilePath): | |
fullRead = open(fullTempFilePath, errors='ignore') | |
else: | |
fullRead = open(fileFullPath, errors='ignore') | |
data = fullRead.read() | |
data = data.replace(firebaseUrl,newFilePath) | |
with open(fullTempFilePath,'wt') as temp_file: | |
temp_file.write(data) | |
i = i + 1 | |
if os.path.exists(fullTempFilePath): | |
path = os.replace(fullTempFilePath,fileFullPath) | |
fullRead.close() | |
# Close file | |
fhand.close() |
Hi Nicole, thanks for making this and making it avaialble.
I'm trying to download voice memos (audio files) with this script but I fear I'm out of my depth. I'm a very novice programmer. If this is fairly straightforward, I would love any tips. If it's most complicated, I'll get more experience before I attempt further. Thanks for your time.
Hi Nicole,
I was able to get the audio files downloaded but the links in the MD files aren't working for PDFs or audio files. Here's what I'm seeing:
{{pdf: assets/1686594009_3.pdf}
{{audio: assets/1686594007_0.wav}
Again, if this is a simple fix, great. If not, still appreciate your time.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you so much for this! Made a fork for anyone trying to download roam images and desiring filenames based on page titles (e.g. 'my-page-1.png' vs. 'timestampnum-1.png' ) : https://gist.github.com/seltzered/f93cd6dbe7db28ac820591e7ee14b820