aayushdutt · June 20, 2025 11:07 · aayushdutt · Aug 19, 2023
diff --git a/scrape_feynman_lectures_playlist.sh b/scrape_feynman_lectures_playlist.sh
 #!/bin/bash

 # Number of concurrent downloads
 num_concurrent_downloads=9

 # Function to perform curl request
 perform_curl() {
    local url=$1
    local filename=$2
    printf "\nDownloading: $filename from $url\n"

    curl "$url" \
        -H 'Accept: */*' \
        -H 'Accept-Language: en-GB,en-US;q=0.9,en;q=0.8' \
        -H 'Connection: keep-alive' \
        -H 'DNT: 1' \
        -H 'Referer: https://www.feynmanlectures.caltech.edu/flptapes.html' \
        -H 'Sec-Fetch-Dest: audio' \
        -H 'Sec-Fetch-Mode: no-cors' \
        -H 'Sec-Fetch-Site: same-origin' \
        -H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36' \
        --compressed -o "$filename"

    printf "\nCOMPLETE: $filename\n"
 }


 # Function to download files concurrently
 download_concurrently() {
    local urls=("$@")
    local num_urls=${#urls[@]}
    local i=0
    local j=0

    printf "Downloading $num_urls files\n\n"

    # Loop through the URLs and perform concurrent downloads
    while [[ $i -lt $num_urls ]]; do
        filename=$(basename "${urls[i]}")
        perform_curl "https://www.feynmanlectures.caltech.edu/protected${urls[i]}" $filename &
        ((i++))
        ((j++))

        # Limit the number of concurrent downloads
        if [[ $j -eq $num_concurrent_downloads ]]; then
            wait -n
            ((j--))
        fi
    done

    # Wait for all remaining downloads to finish
    wait
 }


 # Parse JSON and extract values
 parse_json() {
    local json_file=$1

    # Read the JSON file using jq
    local m4a_urls=($(jq -r '.[].m4a' "$json_file"))

    # Loop through the arrays and perform concurrent curl requests
    download_concurrently "${m4a_urls[@]}"
 }

 # NOTE: You can get the json object by printing console.log(recordings) in the website page https://www.feynmanlectures.caltech.edu/flptapes.html
 # Save this to a file and change json_file value to it's path
 json_file="source.json"
 parse_json "$json_file"
	#!/bin/bash

	# Number of concurrent downloads
	num_concurrent_downloads=9

	# Function to perform curl request
	perform_curl() {
	local url=$1
	local filename=$2
	printf "\nDownloading: $filename from $url\n"

	curl "$url" \
	-H 'Accept: /' \
	-H 'Accept-Language: en-GB,en-US;q=0.9,en;q=0.8' \
	-H 'Connection: keep-alive' \
	-H 'DNT: 1' \
	-H 'Referer: https://www.feynmanlectures.caltech.edu/flptapes.html' \
	-H 'Sec-Fetch-Dest: audio' \
	-H 'Sec-Fetch-Mode: no-cors' \
	-H 'Sec-Fetch-Site: same-origin' \
	-H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36' \
	--compressed -o "$filename"

	printf "\nCOMPLETE: $filename\n"
	}


	# Function to download files concurrently
	download_concurrently() {
	local urls=("$@")
	local num_urls=${#urls[@]}
	local i=0
	local j=0

	printf "Downloading $num_urls files\n\n"

	# Loop through the URLs and perform concurrent downloads
	while [[ $i -lt $num_urls ]]; do
	filename=$(basename "${urls[i]}")
	perform_curl "https://www.feynmanlectures.caltech.edu/protected${urls[i]}" $filename &
	((i++))
	((j++))

	# Limit the number of concurrent downloads
	if [[ $j -eq $num_concurrent_downloads ]]; then
	wait -n
	((j--))
	fi
	done

	# Wait for all remaining downloads to finish
	wait
	}


	# Parse JSON and extract values
	parse_json() {
	local json_file=$1

	# Read the JSON file using jq
	local m4a_urls=($(jq -r '.[].m4a' "$json_file"))

	# Loop through the arrays and perform concurrent curl requests
	download_concurrently "${m4a_urls[@]}"
	}

	# NOTE: You can get the json object by printing console.log(recordings) in the website page https://www.feynmanlectures.caltech.edu/flptapes.html
	# Save this to a file and change json_file value to it's path
	json_file="source.json"
	parse_json "$json_file"