Last active
August 9, 2024 14:33
-
-
Save ozio/5d572d0dee5d3ae9f5ce715627e94ca3 to your computer and use it in GitHub Desktop.
elitebabes.com gallery download bash script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Set the host domain to search for images | |
image_host="https://cdn.elitebabes.com" | |
base_url="https://www.elitebabes.com" | |
# Verbose flag initialization | |
verbose=0 | |
# Function to print debug messages when verbose mode is enabled | |
debug() { | |
if [ $verbose -eq 1 ]; then | |
echo "[DEBUG] $1" | |
fi | |
} | |
# Function to download images from a webpage into a specified directory | |
download_images() { | |
url="$1" | |
target_dir="$2" | |
debug "Processing URL: $url" | |
# Extract the article handle from the URL (everything after the last '/') | |
handle=$(echo "$url" | sed -E 's|.*/([^/]+)/?$|\1|') | |
debug "Extracted handle: $handle" | |
# Ensure target directory exists | |
mkdir -p "$target_dir" | |
debug "Directory ensured: $target_dir" | |
# Create a .webloc file with the URL inside the target directory | |
webloc_file="$target_dir/link.webloc" | |
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" > "$webloc_file" | |
echo "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">" >> "$webloc_file" | |
echo "<plist version=\"1.0\">" >> "$webloc_file" | |
echo "<dict>" >> "$webloc_file" | |
echo " <key>URL</key>" >> "$webloc_file" | |
echo " <string>$url</string>" >> "$webloc_file" | |
echo "</dict>" >> "$webloc_file" | |
echo "</plist>" >> "$webloc_file" | |
debug ".webloc file created: $webloc_file" | |
# Download the HTML of the page | |
html_content=$(curl -s "$url") | |
debug "HTML content downloaded" | |
# Check if the page contains a photo gallery | |
if ! echo "$html_content" | grep -q '<ul class="list-gallery static css"'; then | |
debug "No gallery found on the page, skipping..." | |
return | |
fi | |
# Extract the relevant part of the HTML | |
relevant_content=$(echo "$html_content" | awk '/<ul class="list-gallery static css"/{flag=1;next}/<p class="link-d"/{flag=0}flag') | |
debug "Relevant HTML content extracted" | |
# Extract image URLs that are on the specified domain and inside href="..." | |
image_urls=$(echo "$relevant_content" | grep -Eo "href=\"${image_host//./\\.}[^\"']*\.(jpg|jpeg)\"" | sed -E 's/^href="([^"]+)"/\1/') | |
# Eliminate duplicate links | |
image_urls=$(echo "$image_urls" | sort -u) | |
debug "Image URLs found: $image_urls" | |
# Create a temporary file to store image URLs | |
temp_file=$(mktemp) | |
echo "$image_urls" > "$temp_file" | |
# Use xargs to download images concurrently | |
cat "$temp_file" | xargs -n 1 -P 4 -I {} wget -q --no-clobber --show-progress -P "$target_dir" {} | |
# Clean up temporary file | |
rm "$temp_file" | |
} | |
# Function to download all albums from a model page and handle pagination | |
download_model_albums() { | |
model_url="$1" | |
debug "Processing model URL: $model_url" | |
# Extract the model handle from the URL (everything after the last '/') | |
model_handle=$(echo "$model_url" | sed -E 's|.*/([^/]+)/?$|\1|') | |
debug "Extracted model handle: $model_handle" | |
# Create directory with the model handle name | |
mkdir -p "$model_handle" | |
debug "Directory created: $model_handle" | |
# Initialize next_url with the model_url | |
next_url="$model_url" | |
# Initialize a variable to store all album links | |
all_album_links="" | |
while [ ! -z "$next_url" ]; do | |
# Download the HTML of the model page | |
model_html_content=$(curl -s "$next_url") | |
debug "Model HTML content downloaded from: $next_url" | |
# Extract the relevant part of the HTML | |
relevant_content=$(echo "$model_html_content" | awk '/<ul class="list-gallery has-mobile-menu">/{flag=1;next}/<div class="double text-center">/{flag=0}flag') | |
debug "Relevant HTML content extracted" | |
# Extract album links, excluding links to other models | |
album_links=$(echo "$relevant_content" | grep -Eo "href=\"${base_url}/[^/]+/\"" | grep -vE "model/" | sed -E 's/^href="([^"]+)"/\1/') | |
# Accumulate all unique album links | |
all_album_links=$(echo -e "$all_album_links\n$album_links" | sort -u) | |
debug "Accumulated album links: $all_album_links" | |
# Check for the "Next" page link using grep and sed without using the -P option | |
next_url=$(echo "$model_html_content" | grep '<li class="next"><a href="' | sed -E 's/.*<li class="next"><a href="([^"]+)">Next<\/a><\/li>.*/\1/') | |
if [ ! -z "$next_url" ]; then | |
# Prepend the base URL if the next URL is relative | |
if [[ "$next_url" != http* ]]; then | |
next_url="${base_url}${next_url}" | |
fi | |
debug "Next page URL found: $next_url" | |
else | |
next_url="" | |
debug "No next page found, stopping." | |
fi | |
done | |
# Download images for each accumulated album link | |
for album_link in $all_album_links; do | |
# Extract the album handle | |
album_handle=$(echo "$album_link" | sed -E 's|.*/([^/]+)/?$|\1|') | |
album_dir="${model_handle}/${album_handle}" | |
# Create a directory for each album under the model's directory | |
mkdir -p "$album_dir" | |
debug "Directory created: $album_dir" | |
# Download images from each album into the specific directory | |
download_images "$album_link" "$album_dir" | |
done | |
} | |
# Parse command-line arguments | |
while getopts ":v" opt; do | |
case $opt in | |
v) | |
verbose=1 | |
;; | |
\?) | |
echo "Invalid option: -$OPTARG" >&2 | |
exit 1 | |
;; | |
esac | |
done | |
# Shift positional arguments after options are processed | |
shift $((OPTIND - 1)) | |
# Ensure that a URL is provided as an argument | |
if [ -z "$1" ]; then | |
echo "Usage: $0 [-v] <url>" | |
exit 1 | |
fi | |
# Determine if the URL is an album or model link | |
if [[ "$1" =~ ^${base_url}/model/ ]]; then | |
download_model_albums "$1" | |
else | |
download_images "$1" "$(basename "$1" | sed -E 's|.*/([^/]+)/?$|\1|')" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment