ozio · August 9, 2024 14:33
diff --git a/elitebabes-dl.sh b/elitebabes-dl.sh
 #!/bin/bash

 # Set the host domain to search for images
 image_host="https://cdn.elitebabes.com"
 base_url="https://www.elitebabes.com"

 # Verbose flag initialization
 verbose=0

 # Function to print debug messages when verbose mode is enabled
 debug() {
  if [ $verbose -eq 1 ]; then
    echo "[DEBUG] $1"
  fi
 }

 # Function to download images from a webpage into a specified directory
 download_images() {
  url="$1"
  target_dir="$2"
  
  debug "Processing URL: $url"

  # Extract the article handle from the URL (everything after the last '/')
  handle=$(echo "$url" | sed -E 's|.*/([^/]+)/?$|\1|')
  debug "Extracted handle: $handle"

  # Ensure target directory exists
  mkdir -p "$target_dir"
  debug "Directory ensured: $target_dir"

  # Create a .webloc file with the URL inside the target directory
  webloc_file="$target_dir/link.webloc"
  echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" > "$webloc_file"
  echo "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">" >> "$webloc_file"
  echo "<plist version=\"1.0\">" >> "$webloc_file"
  echo "<dict>" >> "$webloc_file"
  echo "  <key>URL</key>" >> "$webloc_file"
  echo "  <string>$url</string>" >> "$webloc_file"
  echo "</dict>" >> "$webloc_file"
  echo "</plist>" >> "$webloc_file"
  debug ".webloc file created: $webloc_file"

  # Download the HTML of the page
  html_content=$(curl -s "$url")
  debug "HTML content downloaded"

  # Check if the page contains a photo gallery
  if ! echo "$html_content" | grep -q '<ul class="list-gallery static css"'; then
    debug "No gallery found on the page, skipping..."
    return
  fi

  # Extract the relevant part of the HTML
  relevant_content=$(echo "$html_content" | awk '/<ul class="list-gallery static css"/{flag=1;next}/<p class="link-d"/{flag=0}flag')
  debug "Relevant HTML content extracted"

  # Extract image URLs that are on the specified domain and inside href="..."
  image_urls=$(echo "$relevant_content" | grep -Eo "href=\"${image_host//./\\.}[^\"']*\.(jpg|jpeg)\"" | sed -E 's/^href="([^"]+)"/\1/')
  
  # Eliminate duplicate links
  image_urls=$(echo "$image_urls" | sort -u)
  debug "Image URLs found: $image_urls"

  # Create a temporary file to store image URLs
  temp_file=$(mktemp)
  echo "$image_urls" > "$temp_file"

  # Use xargs to download images concurrently
  cat "$temp_file" | xargs -n 1 -P 4 -I {} wget -q --no-clobber --show-progress -P "$target_dir" {}

  # Clean up temporary file
  rm "$temp_file"
 }

 # Function to download all albums from a model page and handle pagination
 download_model_albums() {
  model_url="$1"
  
  debug "Processing model URL: $model_url"

  # Extract the model handle from the URL (everything after the last '/')
  model_handle=$(echo "$model_url" | sed -E 's|.*/([^/]+)/?$|\1|')
  debug "Extracted model handle: $model_handle"

  # Create directory with the model handle name
  mkdir -p "$model_handle"
  debug "Directory created: $model_handle"

  # Initialize next_url with the model_url
  next_url="$model_url"

  # Initialize a variable to store all album links
  all_album_links=""

  while [ ! -z "$next_url" ]; do
    # Download the HTML of the model page
    model_html_content=$(curl -s "$next_url")
    debug "Model HTML content downloaded from: $next_url"

    # Extract the relevant part of the HTML
    relevant_content=$(echo "$model_html_content" | awk '/<ul class="list-gallery has-mobile-menu">/{flag=1;next}/<div class="double text-center">/{flag=0}flag')
    debug "Relevant HTML content extracted"

    # Extract album links, excluding links to other models
    album_links=$(echo "$relevant_content" | grep -Eo "href=\"${base_url}/[^/]+/\"" | grep -vE "model/" | sed -E 's/^href="([^"]+)"/\1/')

    # Accumulate all unique album links
    all_album_links=$(echo -e "$all_album_links\n$album_links" | sort -u)
    debug "Accumulated album links: $all_album_links"

    # Check for the "Next" page link using grep and sed without using the -P option
    next_url=$(echo "$model_html_content" | grep '<li class="next"><a href="' | sed -E 's/.*<li class="next"><a href="([^"]+)">Next<\/a><\/li>.*/\1/')
    if [ ! -z "$next_url" ]; then
      # Prepend the base URL if the next URL is relative
      if [[ "$next_url" != http* ]]; then
        next_url="${base_url}${next_url}"
      fi
      debug "Next page URL found: $next_url"
    else
      next_url=""
      debug "No next page found, stopping."
    fi
  done

  # Download images for each accumulated album link
  for album_link in $all_album_links; do
    # Extract the album handle
    album_handle=$(echo "$album_link" | sed -E 's|.*/([^/]+)/?$|\1|')
    album_dir="${model_handle}/${album_handle}"

    # Create a directory for each album under the model's directory
    mkdir -p "$album_dir"
    debug "Directory created: $album_dir"

    # Download images from each album into the specific directory
    download_images "$album_link" "$album_dir"
  done
 }

 # Parse command-line arguments
 while getopts ":v" opt; do
  case $opt in
    v)
      verbose=1
      ;;
    \?)
      echo "Invalid option: -$OPTARG" >&2
      exit 1
      ;;
  esac
 done

 # Shift positional arguments after options are processed
 shift $((OPTIND - 1))

 # Ensure that a URL is provided as an argument
 if [ -z "$1" ]; then
  echo "Usage: $0 [-v] <url>"
  exit 1
 fi

 # Determine if the URL is an album or model link
 if [[ "$1" =~ ^${base_url}/model/ ]]; then
  download_model_albums "$1"
 else
  download_images "$1" "$(basename "$1" | sed -E 's|.*/([^/]+)/?$|\1|')"
 fi
	#!/bin/bash

	# Set the host domain to search for images
	image_host="https://cdn.elitebabes.com"
	base_url="https://www.elitebabes.com"

	# Verbose flag initialization
	verbose=0

	# Function to print debug messages when verbose mode is enabled
	debug() {
	if [ $verbose -eq 1 ]; then
	echo "[DEBUG] $1"
	fi
	}

	# Function to download images from a webpage into a specified directory
	download_images() {
	url="$1"
	target_dir="$2"

	debug "Processing URL: $url"

	# Extract the article handle from the URL (everything after the last '/')
	handle=$(echo "$url" \| sed -E 's\|.*/([^/]+)/?$\|\1\|')
	debug "Extracted handle: $handle"

	# Ensure target directory exists
	mkdir -p "$target_dir"
	debug "Directory ensured: $target_dir"

	# Create a .webloc file with the URL inside the target directory
	webloc_file="$target_dir/link.webloc"
	echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" > "$webloc_file"
	echo "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">" >> "$webloc_file"
	echo "<plist version=\"1.0\">" >> "$webloc_file"
	echo "<dict>" >> "$webloc_file"
	echo " <key>URL</key>" >> "$webloc_file"
	echo " <string>$url</string>" >> "$webloc_file"
	echo "</dict>" >> "$webloc_file"
	echo "</plist>" >> "$webloc_file"
	debug ".webloc file created: $webloc_file"

	# Download the HTML of the page
	html_content=$(curl -s "$url")
	debug "HTML content downloaded"

	# Check if the page contains a photo gallery
	if ! echo "$html_content" \| grep -q '<ul class="list-gallery static css"'; then
	debug "No gallery found on the page, skipping..."
	return
	fi

	# Extract the relevant part of the HTML
	relevant_content=$(echo "$html_content" \| awk '/<ul class="list-gallery static css"/{flag=1;next}/<p class="link-d"/{flag=0}flag')
	debug "Relevant HTML content extracted"

	# Extract image URLs that are on the specified domain and inside href="..."
	image_urls=$(echo "$relevant_content" \| grep -Eo "href=\"${image_host//./\\.}[^\"']*\.(jpg\|jpeg)\"" \| sed -E 's/^href="([^"]+)"/\1/')

	# Eliminate duplicate links
	image_urls=$(echo "$image_urls" \| sort -u)
	debug "Image URLs found: $image_urls"

	# Create a temporary file to store image URLs
	temp_file=$(mktemp)
	echo "$image_urls" > "$temp_file"

	# Use xargs to download images concurrently
	cat "$temp_file" \| xargs -n 1 -P 4 -I {} wget -q --no-clobber --show-progress -P "$target_dir" {}

	# Clean up temporary file
	rm "$temp_file"
	}

	# Function to download all albums from a model page and handle pagination
	download_model_albums() {
	model_url="$1"

	debug "Processing model URL: $model_url"

	# Extract the model handle from the URL (everything after the last '/')
	model_handle=$(echo "$model_url" \| sed -E 's\|.*/([^/]+)/?$\|\1\|')
	debug "Extracted model handle: $model_handle"

	# Create directory with the model handle name
	mkdir -p "$model_handle"
	debug "Directory created: $model_handle"

	# Initialize next_url with the model_url
	next_url="$model_url"

	# Initialize a variable to store all album links
	all_album_links=""

	while [ ! -z "$next_url" ]; do
	# Download the HTML of the model page
	model_html_content=$(curl -s "$next_url")
	debug "Model HTML content downloaded from: $next_url"

	# Extract the relevant part of the HTML
	relevant_content=$(echo "$model_html_content" \| awk '/<ul class="list-gallery has-mobile-menu">/{flag=1;next}/<div class="double text-center">/{flag=0}flag')
	debug "Relevant HTML content extracted"

	# Extract album links, excluding links to other models
	album_links=$(echo "$relevant_content" \| grep -Eo "href=\"${base_url}/[^/]+/\"" \| grep -vE "model/" \| sed -E 's/^href="([^"]+)"/\1/')

	# Accumulate all unique album links
	all_album_links=$(echo -e "$all_album_links\n$album_links" \| sort -u)
	debug "Accumulated album links: $all_album_links"

	# Check for the "Next" page link using grep and sed without using the -P option
	next_url=$(echo "$model_html_content" \| grep '<li class="next"><a href="' \| sed -E 's/.<li class="next"><a href="([^"]+)">Next<\/a><\/li>./\1/')
	if [ ! -z "$next_url" ]; then
	# Prepend the base URL if the next URL is relative
	if [[ "$next_url" != http* ]]; then
	next_url="${base_url}${next_url}"
	fi
	debug "Next page URL found: $next_url"
	else
	next_url=""
	debug "No next page found, stopping."
	fi
	done

	# Download images for each accumulated album link
	for album_link in $all_album_links; do
	# Extract the album handle
	album_handle=$(echo "$album_link" \| sed -E 's\|.*/([^/]+)/?$\|\1\|')
	album_dir="${model_handle}/${album_handle}"

	# Create a directory for each album under the model's directory
	mkdir -p "$album_dir"
	debug "Directory created: $album_dir"

	# Download images from each album into the specific directory
	download_images "$album_link" "$album_dir"
	done
	}

	# Parse command-line arguments
	while getopts ":v" opt; do
	case $opt in
	v)
	verbose=1
	;;
	\?)
	echo "Invalid option: -$OPTARG" >&2
	exit 1
	;;
	esac
	done

	# Shift positional arguments after options are processed
	shift $((OPTIND - 1))

	# Ensure that a URL is provided as an argument
	if [ -z "$1" ]; then
	echo "Usage: $0 [-v] <url>"
	exit 1
	fi

	# Determine if the URL is an album or model link
	if [[ "$1" =~ ^${base_url}/model/ ]]; then
	download_model_albums "$1"
	else
	download_images "$1" "$(basename "$1" \| sed -E 's\|.*/([^/]+)/?$\|\1\|')"
	fi