-
-
Save nicolevanderhoeven/b25c97a8a68ea97e7bdf5ee674fdaec4 to your computer and use it in GitHub Desktop.
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files. | |
# To use, replace PATH in the variable vaultDir with your vault's root directory. | |
# This automatically puts filenames in /assets - change the newFilePath variable if you want to change this | |
import re | |
import glob | |
import os | |
import requests | |
import calendar | |
import time | |
vaultDir = '/PATH' | |
firebaseShort = 'none' | |
fullRead = 'none' | |
fileFullPath = '' | |
fullTempFilePath = '' | |
i = 0 | |
ext = '' | |
# Walk through all files in all directories within the specified vault directory | |
for subdir, dirs, files in os.walk(vaultDir): | |
for file in files: | |
# Open file in directory | |
fileFullPath = os.path.join(subdir,file) | |
fhand = open(fileFullPath, errors='ignore') | |
for line in fhand: | |
# Download the Firebase file and save it in the assets folder | |
if 'firebasestorage' in line: | |
try: | |
# If it's a PDF, it will be in the format {{pdf: link}} | |
if '{{pdf:' in line: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line) | |
else: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line) | |
firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png | |
firebaseUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6 | |
# Download the file locally | |
r = requests.get(firebaseUrl) | |
timestamp = calendar.timegm(time.gmtime()) | |
# Get file extension of file. Ex: .png; .jpeg | |
reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg | |
ext = '.' + reg.group(2) # .jpeg | |
# Create assets folder if it doesn't exist | |
if not os.path.exists(vaultDir + '/assets'): | |
os.makedirs(vaultDir + '/assets') | |
# Create new local file out of downloaded firebase file | |
newFilePath = 'assets/' + str(timestamp) + '_' + str(i) + ext | |
# print(firebaseUrl + '>>>' + newFilePath) | |
with open(vaultDir + '/' + newFilePath,'wb') as output_file: | |
output_file.write(r.content) | |
except AttributeError: # This is to prevent the AttributeError exception when no matches are returned | |
continue | |
# Save Markdown file with new local file link as a temp file | |
# If there is already a temp version of a file, open that. | |
fullTempFilePath = vaultDir + '/temp_' + file | |
if os.path.exists(fullTempFilePath): | |
fullRead = open(fullTempFilePath, errors='ignore') | |
else: | |
fullRead = open(fileFullPath, errors='ignore') | |
data = fullRead.read() | |
data = data.replace(firebaseUrl,newFilePath) | |
with open(fullTempFilePath,'wt') as temp_file: | |
temp_file.write(data) | |
i = i + 1 | |
if os.path.exists(fullTempFilePath): | |
path = os.replace(fullTempFilePath,fileFullPath) | |
fullRead.close() | |
# Close file | |
fhand.close() |
It worked for me, thanks!
Thank you so much for this! Made a fork for anyone trying to download roam images and desiring filenames based on page titles (e.g. 'my-page-1.png' vs. 'timestampnum-1.png' ) : https://gist.github.com/seltzered/f93cd6dbe7db28ac820591e7ee14b820
Hi Nicole, thanks for making this and making it avaialble.
I'm trying to download voice memos (audio files) with this script but I fear I'm out of my depth. I'm a very novice programmer. If this is fairly straightforward, I would love any tips. If it's most complicated, I'll get more experience before I attempt further. Thanks for your time.
Hi Nicole,
I was able to get the audio files downloaded but the links in the MD files aren't working for PDFs or audio files. Here's what I'm seeing:
{{pdf: assets/1686594009_3.pdf}
{{audio: assets/1686594007_0.wav}
Again, if this is a simple fix, great. If not, still appreciate your time.
Thank you so much for this @nicolevanderhoeven ! Getting my firebase images over from Roam to Logseq has been killing me. I am a total noob, but I think I at least have the script running now.
Hoping for a bit of guidance.
When I run, it creates an assets folder and puts one image in there correctly but then stops.
Any ideas? (I might be doing something really obvious wrong.)
ERROR:
PermissionError: [WinError 5] Access is denied: '~/MyDirectory//temp_2021_07_31.md'