Skip to content

Instantly share code, notes, and snippets.

@ChronoMonochrome
Last active March 20, 2025 19:02
Show Gist options
  • Save ChronoMonochrome/4bbab1933816d0dc3803926d567e9623 to your computer and use it in GitHub Desktop.
Save ChronoMonochrome/4bbab1933816d0dc3803926d567e9623 to your computer and use it in GitHub Desktop.
Python script to download songs from SoundCloud
import os
import sys
import requests
import bs4
import subprocess
from pathvalidate import sanitize_filename
def load_urls(filename):
with open(filename, 'r') as file:
urls = file.read().strip().split('\n')
return urls
def get_audio_codec(filename):
command = f"ffprobe -v error -select_streams a:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 '{filename}'"
codec = subprocess.check_output(command, shell=True).decode().strip()
return codec
def create_default_artwork_image(filename):
# Base64 string for a black 512x512 JPEG image
base64_image = (
'''/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAQABADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooA//2Q=='''
)
image_data = base64.b64decode(base64_image)
with open(filename, 'wb') as f:
f.write(image_data)
def download(url):
print(f"Starting download for {url}")
page = requests.get(url)
soup = bs4.BeautifulSoup(page.content, 'html.parser')
soundtrack_id = soup.find('meta', {'property': 'al:android:url'}).get('content').split(':')[-1]
soundtrack_name = soup.find('a', {'itemprop': 'url'}).text
soundtrack_filename = sanitize_filename(f'{soundtrack_name} [{soundtrack_id}].mp3').replace('\'', '')
out_filename = f'{os.path.splitext(soundtrack_filename)[0]}_1.mp3'
if os.path.exists(out_filename):
print(f"File {out_filename} already exists. Skipping.")
return
artwork_img_tag = soup.find('meta', {'property': 'twitter:image'})
if artwork_img_tag:
artwork_img_url = artwork_img_tag.get('content')
if artwork_img_url:
artwork_filename = os.path.split(artwork_img_url)[-1]
if not os.path.exists(artwork_filename):
artwork_img = requests.get(artwork_img_url)
open(artwork_filename, 'wb').write(artwork_img.content)
else:
artwork_filename = 'default_artwork.jpg'
else:
artwork_filename = 'default_artwork.jpg'
# Check if default artwork file exists, if not create it
if not os.path.exists(artwork_filename):
create_default_artwork_image(artwork_filename)
os.system(f'''yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' {url} -o '{soundtrack_filename}' ''')
codec = get_audio_codec(soundtrack_filename)
if codec in ['opus', 'ogg']:
temp_filename = f'converted_{soundtrack_filename}'
os.system(f'''ffmpeg -i '{soundtrack_filename}' -codec:a libmp3lame -qscale:a 2 '{temp_filename}' ''')
audio_file = temp_filename
else:
audio_file = soundtrack_filename
os.system(f'''ffmpeg -i '{audio_file}' -i '{artwork_filename}' -map 0:0 -map 1:0 -c copy -id3v2_version 3 -metadata:s:v title='Album cover' -metadata:s:v comment='Cover (front)' '{out_filename}' ''')
print(f"Finished download for {url}")
if __name__ == "__main__":
urls = load_urls('extracted_links.txt')
for url in urls:
if url == '' or not url.startswith('http'):
print(f'incorrect url: {url}')
continue
download(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment