Skip to content

Instantly share code, notes, and snippets.

@frenchcharly
Last active October 10, 2024 19:02
Show Gist options
  • Save frenchcharly/39f5975fe1d213d03859725b4f708a58 to your computer and use it in GitHub Desktop.
Save frenchcharly/39f5975fe1d213d03859725b4f708a58 to your computer and use it in GitHub Desktop.
Transform Bear Exported files & attachments into a directory based hierarchy of files (ie: Obsidian)

Usage

  1. Adjust path for variables to suit your needs: (source_dir, dest_dir and attachments_dir if needed)
  2. run python3 sort_bear_exported_files.py

Result

  • Loops through files in source_dir and copies them into dest_dir while preserving their properties
  • copies attachments into a specific folder (attachments_dir) and renames them to avoid unique name conflicts
  • matches the hierarchy of directories (full path) based on the first 'valid' tag found inside the markdown file

Status

Work in Progress, fits my usage (migrating to Obsidian, macOS, etc), feel free to use, modify, etc. Also, my knowledge of Python is rudimentary, apologies for blatent mistakes if any.

/wave

import os
import shutil
import re
# Hardcoded source directory (where Bear exports all markdown files)
source_dir = os.path.expanduser("~/Desktop/Bear Export")
# Destination directory (SortedNotes on Desktop)
dest_dir = os.path.expanduser("~/Desktop/Sorted Notes")
# Directory for untagged files
to_tag_dir = os.path.join(dest_dir, "to_tag")
# Attachments directory at the root of SortedNotes
attachments_dir = os.path.join(dest_dir, "attachments")
# Ensure destination and "to_tag" directories exist
os.makedirs(dest_dir, exist_ok=True)
os.makedirs(to_tag_dir, exist_ok=True)
os.makedirs(attachments_dir, exist_ok=True)
# Function to sanitize filenames by replacing forbidden characters
def sanitize_filename(filename):
# Define forbidden characters for macOS filenames
forbidden_characters = r':*?"<>|'
for char in forbidden_characters:
filename = filename.replace(char, '') # Remove each forbidden character
return filename
# Function to extract the last valid Bear-style tag path from a markdown file, ignoring URLs and inline `#`
def extract_tag_path(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove all URLs : http:// or https://, bear:// and chrome-extension://
content_no_urls = re.sub(r'(?:(?:https?://|bear://|chrome-extension://)\S+)', '', content)
# Regex to find Bear-style tags, ensuring they start at the beginning of a line or after whitespace
tag_matches = re.findall(r'(^|\s)#([\w\-/]+)', content_no_urls)
# Filter out tags that are only numbers
valid_tags = [match[1].strip() for match in tag_matches if not match[1].isdigit()]
if valid_tags:
# Select the last valid tag found in the file
return valid_tags[-1] # Return the last valid tag path
return None # Return None if no valid tags found
# Function to copy attachments to the attachments directory with renamed filenames
def copy_attachments(folder_name, markdown_file_path):
attachment_folder_path = os.path.join(source_dir, folder_name)
if os.path.exists(attachment_folder_path):
for attachment in os.listdir(attachment_folder_path):
attachment_path = os.path.join(attachment_folder_path, attachment)
if os.path.isfile(attachment_path): # Ensure it's a file
# Create a safe filename for the attachment
safe_path = markdown_file_path.replace(dest_dir + '/', '').replace('.md', '').replace(' ', '_').replace('/', '-')
new_filename = f"{safe_path}-{attachment}" # New filename format
new_attachment_path = os.path.join(attachments_dir, new_filename)
shutil.copy2(attachment_path, new_attachment_path) # Copy the attachment
print(f"Copied attachment: {attachment} to {new_attachment_path}")
# Walk through the source directory to find all markdown files
for root, dirs, files in os.walk(source_dir):
for file in files:
if file.endswith(".md"): # Process only markdown files
file_path = os.path.join(root, file)
try:
# Extract the Bear-style tag path from the file
tag_path = extract_tag_path(file_path)
if tag_path:
# Use the original tag path for directory creation
directory_path = os.path.join(dest_dir, *tag_path.lower().split('/'))
# Create the directory structure if it doesn't exist
os.makedirs(directory_path, exist_ok=True)
# Copy the markdown file to the new directory, preserving properties
new_markdown_file_path = os.path.join(directory_path, file) # Define new path for markdown file
shutil.copy2(file_path, new_markdown_file_path) # Copy file while preserving metadata
# Check for attachments and copy them
markdown_file_name_without_extension = os.path.splitext(file)[0]
copy_attachments(markdown_file_name_without_extension, new_markdown_file_path)
else:
# If no valid tag is found, move the file to the "to_tag" directory
shutil.copy2(file_path, os.path.join(to_tag_dir, file))
except Exception as e:
print(f"ERROR: Failed to process {file}: {e}")
print("Notes copied successfully!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment