Last active
March 20, 2025 19:02
-
-
Save ChronoMonochrome/4bbab1933816d0dc3803926d567e9623 to your computer and use it in GitHub Desktop.
Python script to download songs from SoundCloud
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import requests | |
import bs4 | |
import subprocess | |
from pathvalidate import sanitize_filename | |
def load_urls(filename): | |
with open(filename, 'r') as file: | |
urls = file.read().strip().split('\n') | |
return urls | |
def get_audio_codec(filename): | |
command = f"ffprobe -v error -select_streams a:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 '{filename}'" | |
codec = subprocess.check_output(command, shell=True).decode().strip() | |
return codec | |
def create_default_artwork_image(filename): | |
# Base64 string for a black 512x512 JPEG image | |
base64_image = ( | |
'''/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAQABADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooA//2Q==''' | |
) | |
image_data = base64.b64decode(base64_image) | |
with open(filename, 'wb') as f: | |
f.write(image_data) | |
def download(url): | |
print(f"Starting download for {url}") | |
page = requests.get(url) | |
soup = bs4.BeautifulSoup(page.content, 'html.parser') | |
soundtrack_id = soup.find('meta', {'property': 'al:android:url'}).get('content').split(':')[-1] | |
soundtrack_name = soup.find('a', {'itemprop': 'url'}).text | |
soundtrack_filename = sanitize_filename(f'{soundtrack_name} [{soundtrack_id}].mp3').replace('\'', '') | |
out_filename = f'{os.path.splitext(soundtrack_filename)[0]}_1.mp3' | |
if os.path.exists(out_filename): | |
print(f"File {out_filename} already exists. Skipping.") | |
return | |
artwork_img_tag = soup.find('meta', {'property': 'twitter:image'}) | |
if artwork_img_tag: | |
artwork_img_url = artwork_img_tag.get('content') | |
if artwork_img_url: | |
artwork_filename = os.path.split(artwork_img_url)[-1] | |
if not os.path.exists(artwork_filename): | |
artwork_img = requests.get(artwork_img_url) | |
open(artwork_filename, 'wb').write(artwork_img.content) | |
else: | |
artwork_filename = 'default_artwork.jpg' | |
else: | |
artwork_filename = 'default_artwork.jpg' | |
# Check if default artwork file exists, if not create it | |
if not os.path.exists(artwork_filename): | |
create_default_artwork_image(artwork_filename) | |
os.system(f'''yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' {url} -o '{soundtrack_filename}' ''') | |
codec = get_audio_codec(soundtrack_filename) | |
if codec in ['opus', 'ogg']: | |
temp_filename = f'converted_{soundtrack_filename}' | |
os.system(f'''ffmpeg -i '{soundtrack_filename}' -codec:a libmp3lame -qscale:a 2 '{temp_filename}' ''') | |
audio_file = temp_filename | |
else: | |
audio_file = soundtrack_filename | |
os.system(f'''ffmpeg -i '{audio_file}' -i '{artwork_filename}' -map 0:0 -map 1:0 -c copy -id3v2_version 3 -metadata:s:v title='Album cover' -metadata:s:v comment='Cover (front)' '{out_filename}' ''') | |
print(f"Finished download for {url}") | |
if __name__ == "__main__": | |
urls = load_urls('extracted_links.txt') | |
for url in urls: | |
if url == '' or not url.startswith('http'): | |
print(f'incorrect url: {url}') | |
continue | |
download(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment