Skip to content

Instantly share code, notes, and snippets.

@ozio
Last active August 9, 2024 14:33
Show Gist options
  • Save ozio/5d572d0dee5d3ae9f5ce715627e94ca3 to your computer and use it in GitHub Desktop.
Save ozio/5d572d0dee5d3ae9f5ce715627e94ca3 to your computer and use it in GitHub Desktop.
elitebabes.com gallery download bash script
#!/bin/bash
# Set the host domain to search for images
image_host="https://cdn.elitebabes.com"
base_url="https://www.elitebabes.com"
# Verbose flag initialization
verbose=0
# Function to print debug messages when verbose mode is enabled
debug() {
if [ $verbose -eq 1 ]; then
echo "[DEBUG] $1"
fi
}
# Function to download images from a webpage into a specified directory
download_images() {
url="$1"
target_dir="$2"
debug "Processing URL: $url"
# Extract the article handle from the URL (everything after the last '/')
handle=$(echo "$url" | sed -E 's|.*/([^/]+)/?$|\1|')
debug "Extracted handle: $handle"
# Ensure target directory exists
mkdir -p "$target_dir"
debug "Directory ensured: $target_dir"
# Create a .webloc file with the URL inside the target directory
webloc_file="$target_dir/link.webloc"
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" > "$webloc_file"
echo "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">" >> "$webloc_file"
echo "<plist version=\"1.0\">" >> "$webloc_file"
echo "<dict>" >> "$webloc_file"
echo " <key>URL</key>" >> "$webloc_file"
echo " <string>$url</string>" >> "$webloc_file"
echo "</dict>" >> "$webloc_file"
echo "</plist>" >> "$webloc_file"
debug ".webloc file created: $webloc_file"
# Download the HTML of the page
html_content=$(curl -s "$url")
debug "HTML content downloaded"
# Check if the page contains a photo gallery
if ! echo "$html_content" | grep -q '<ul class="list-gallery static css"'; then
debug "No gallery found on the page, skipping..."
return
fi
# Extract the relevant part of the HTML
relevant_content=$(echo "$html_content" | awk '/<ul class="list-gallery static css"/{flag=1;next}/<p class="link-d"/{flag=0}flag')
debug "Relevant HTML content extracted"
# Extract image URLs that are on the specified domain and inside href="..."
image_urls=$(echo "$relevant_content" | grep -Eo "href=\"${image_host//./\\.}[^\"']*\.(jpg|jpeg)\"" | sed -E 's/^href="([^"]+)"/\1/')
# Eliminate duplicate links
image_urls=$(echo "$image_urls" | sort -u)
debug "Image URLs found: $image_urls"
# Create a temporary file to store image URLs
temp_file=$(mktemp)
echo "$image_urls" > "$temp_file"
# Use xargs to download images concurrently
cat "$temp_file" | xargs -n 1 -P 4 -I {} wget -q --no-clobber --show-progress -P "$target_dir" {}
# Clean up temporary file
rm "$temp_file"
}
# Function to download all albums from a model page and handle pagination
download_model_albums() {
model_url="$1"
debug "Processing model URL: $model_url"
# Extract the model handle from the URL (everything after the last '/')
model_handle=$(echo "$model_url" | sed -E 's|.*/([^/]+)/?$|\1|')
debug "Extracted model handle: $model_handle"
# Create directory with the model handle name
mkdir -p "$model_handle"
debug "Directory created: $model_handle"
# Initialize next_url with the model_url
next_url="$model_url"
# Initialize a variable to store all album links
all_album_links=""
while [ ! -z "$next_url" ]; do
# Download the HTML of the model page
model_html_content=$(curl -s "$next_url")
debug "Model HTML content downloaded from: $next_url"
# Extract the relevant part of the HTML
relevant_content=$(echo "$model_html_content" | awk '/<ul class="list-gallery has-mobile-menu">/{flag=1;next}/<div class="double text-center">/{flag=0}flag')
debug "Relevant HTML content extracted"
# Extract album links, excluding links to other models
album_links=$(echo "$relevant_content" | grep -Eo "href=\"${base_url}/[^/]+/\"" | grep -vE "model/" | sed -E 's/^href="([^"]+)"/\1/')
# Accumulate all unique album links
all_album_links=$(echo -e "$all_album_links\n$album_links" | sort -u)
debug "Accumulated album links: $all_album_links"
# Check for the "Next" page link using grep and sed without using the -P option
next_url=$(echo "$model_html_content" | grep '<li class="next"><a href="' | sed -E 's/.*<li class="next"><a href="([^"]+)">Next<\/a><\/li>.*/\1/')
if [ ! -z "$next_url" ]; then
# Prepend the base URL if the next URL is relative
if [[ "$next_url" != http* ]]; then
next_url="${base_url}${next_url}"
fi
debug "Next page URL found: $next_url"
else
next_url=""
debug "No next page found, stopping."
fi
done
# Download images for each accumulated album link
for album_link in $all_album_links; do
# Extract the album handle
album_handle=$(echo "$album_link" | sed -E 's|.*/([^/]+)/?$|\1|')
album_dir="${model_handle}/${album_handle}"
# Create a directory for each album under the model's directory
mkdir -p "$album_dir"
debug "Directory created: $album_dir"
# Download images from each album into the specific directory
download_images "$album_link" "$album_dir"
done
}
# Parse command-line arguments
while getopts ":v" opt; do
case $opt in
v)
verbose=1
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
esac
done
# Shift positional arguments after options are processed
shift $((OPTIND - 1))
# Ensure that a URL is provided as an argument
if [ -z "$1" ]; then
echo "Usage: $0 [-v] <url>"
exit 1
fi
# Determine if the URL is an album or model link
if [[ "$1" =~ ^${base_url}/model/ ]]; then
download_model_albums "$1"
else
download_images "$1" "$(basename "$1" | sed -E 's|.*/([^/]+)/?$|\1|')"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment