eliascotto · March 29, 2025 07:58
diff --git a/instagrabber.py b/instagrabber.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.firefox.service import Service
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import time
 import requests
 import os

 # Configuration
 USERNAME = ""  # Replace with your Instagram username
 PASSWORD = ""  # Replace with your Instagram password
 TARGET_PROFILE = ""  # Replace with the profile you want to download from
 SAVE_FOLDER = ""  # Replace with your desired folder path
 GECKODRIVER_PATH = ""  # Replace with your chromedriver path

 # Ensure save folder exists
 if not os.path.exists(SAVE_FOLDER):
    os.makedirs(SAVE_FOLDER)

 # Set up Firefox WebDriver
 service = Service(GECKODRIVER_PATH)
 driver = webdriver.Firefox(service=service)

 def login_to_instagram():
    driver.get("https://www.instagram.com/accounts/login/")
    time.sleep(3)  # Wait for page to load

    # Enter username
    username_field = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.NAME, "username"))
    )
    username_field.send_keys(USERNAME)

    # Enter password
    password_field = driver.find_element(By.NAME, "password")
    password_field.send_keys(PASSWORD)

    # Click login button
    login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
    login_button.click()

    # Wait for login to complete
    time.sleep(5)

 def get_post_urls():
    driver.get(f"https://www.instagram.com/{TARGET_PROFILE}/")
    time.sleep(3)  # Wait for profile to load

    # Set to store unique post URLs
    post_urls = set()

    # Get the total height of the page
    total_height = driver.execute_script("return document.body.scrollHeight")
    scroll_position = 0
    scroll_increment = 500  # Scroll by 1000 pixels at a time (adjustable)

    while scroll_position < total_height:
        # Find all post links currently loaded
        post_elements = driver.find_elements(By.XPATH, "//a[@href]")
        for elem in post_elements:
            href = elem.get_attribute("href")
            if href and "/p/" in href:  # Filter for post URLs
                post_urls.add(href)

        # Scroll incrementally
        driver.execute_script(f"window.scrollTo(0, {scroll_position + scroll_increment});")
        time.sleep(0.5)  # Short delay to mimic human scrolling and allow loading
        
        # Update scroll position and total height (in case new content loads)
        scroll_position += scroll_increment
        time.sleep(2)
        total_height = driver.execute_script("return document.body.scrollHeight")

    print(f"Found {len(post_urls)} unique posts.")
    return list(post_urls)

 def download_full_size_image(post_url, index):
    driver.get(post_url)
    time.sleep(2)  # Wait for post to load

    try:
        img_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//img[contains(@class, 'x5yr21d')]"))
        )
        img_url = img_element.get_attribute("src")
        srcset = img_element.get_attribute("srcset")

        print(f"Processing {post_url}: src={img_url[:50]}..., srcset={srcset[:50]}...")

        # Try to parse srcset for the highest resolution
        if srcset:
            try:
                # Split srcset into entries (e.g., "url 640w, url 1080w")
                sources = [s.strip() for s in srcset.split(",")]
                # Extract URL and width, assuming format "url width"
                best_url = None
                max_width = 0
                for source in sources:
                    parts = source.split(" ")
                    if len(parts) >= 2 and parts[-1].endswith("w"):
                        width = int(parts[-1].replace("w", ""))
                        url = " ".join(parts[:-1])
                        if width > max_width:
                            max_width = width
                            best_url = url
                if best_url:
                    img_url = best_url
                    print(f"Using srcset URL with width {max_width}")
            except Exception as e:
                print(f"Failed to parse srcset: {e}, falling back to src")

        # Download the image
        filename = f"image_{index+1}.jpg"
        filepath = os.path.join(SAVE_FOLDER, filename)
        response = requests.get(img_url, stream=True)
        if response.status_code == 200:
            with open(filepath, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            print(f"Downloaded {filename} from {post_url}")
        else:
            print(f"Failed to download {filename} from {post_url} (status: {response.status_code})")
    except Exception as e:
        print(f"Error processing {post_url}: {e}")

 def main():
    try:
        print("Logging in to Instagram...")
        login_to_instagram()

        try:
            not_now_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Not Now']"))
            )
            not_now_button.click()
        except:
            print("No 'Not Now' prompt found, proceeding...")

        print(f"Collecting post URLs from {TARGET_PROFILE}...")
        post_urls = get_post_urls()

        print(f"Downloading full-size images from {len(post_urls)} posts...")
        for i, url in enumerate(post_urls):
            download_full_size_image(url, i)

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()
        print("Browser closed.")

 if __name__ == "__main__":
    main()
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.firefox.service import Service
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	import time
	import requests
	import os

	# Configuration
	USERNAME = "" # Replace with your Instagram username
	PASSWORD = "" # Replace with your Instagram password
	TARGET_PROFILE = "" # Replace with the profile you want to download from
	SAVE_FOLDER = "" # Replace with your desired folder path
	GECKODRIVER_PATH = "" # Replace with your chromedriver path

	# Ensure save folder exists
	if not os.path.exists(SAVE_FOLDER):
	os.makedirs(SAVE_FOLDER)

	# Set up Firefox WebDriver
	service = Service(GECKODRIVER_PATH)
	driver = webdriver.Firefox(service=service)

	def login_to_instagram():
	driver.get("https://www.instagram.com/accounts/login/")
	time.sleep(3) # Wait for page to load

	# Enter username
	username_field = WebDriverWait(driver, 10).until(
	EC.presence_of_element_located((By.NAME, "username"))
	)
	username_field.send_keys(USERNAME)

	# Enter password
	password_field = driver.find_element(By.NAME, "password")
	password_field.send_keys(PASSWORD)

	# Click login button
	login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
	login_button.click()

	# Wait for login to complete
	time.sleep(5)

	def get_post_urls():
	driver.get(f"https://www.instagram.com/{TARGET_PROFILE}/")
	time.sleep(3) # Wait for profile to load

	# Set to store unique post URLs
	post_urls = set()

	# Get the total height of the page
	total_height = driver.execute_script("return document.body.scrollHeight")
	scroll_position = 0
	scroll_increment = 500 # Scroll by 1000 pixels at a time (adjustable)

	while scroll_position < total_height:
	# Find all post links currently loaded
	post_elements = driver.find_elements(By.XPATH, "//a[@href]")
	for elem in post_elements:
	href = elem.get_attribute("href")
	if href and "/p/" in href: # Filter for post URLs
	post_urls.add(href)

	# Scroll incrementally
	driver.execute_script(f"window.scrollTo(0, {scroll_position + scroll_increment});")
	time.sleep(0.5) # Short delay to mimic human scrolling and allow loading

	# Update scroll position and total height (in case new content loads)
	scroll_position += scroll_increment
	time.sleep(2)
	total_height = driver.execute_script("return document.body.scrollHeight")

	print(f"Found {len(post_urls)} unique posts.")
	return list(post_urls)

	def download_full_size_image(post_url, index):
	driver.get(post_url)
	time.sleep(2) # Wait for post to load

	try:
	img_element = WebDriverWait(driver, 10).until(
	EC.presence_of_element_located((By.XPATH, "//img[contains(@class, 'x5yr21d')]"))
	)
	img_url = img_element.get_attribute("src")
	srcset = img_element.get_attribute("srcset")

	print(f"Processing {post_url}: src={img_url[:50]}..., srcset={srcset[:50]}...")

	# Try to parse srcset for the highest resolution
	if srcset:
	try:
	# Split srcset into entries (e.g., "url 640w, url 1080w")
	sources = [s.strip() for s in srcset.split(",")]
	# Extract URL and width, assuming format "url width"
	best_url = None
	max_width = 0
	for source in sources:
	parts = source.split(" ")
	if len(parts) >= 2 and parts[-1].endswith("w"):
	width = int(parts[-1].replace("w", ""))
	url = " ".join(parts[:-1])
	if width > max_width:
	max_width = width
	best_url = url
	if best_url:
	img_url = best_url
	print(f"Using srcset URL with width {max_width}")
	except Exception as e:
	print(f"Failed to parse srcset: {e}, falling back to src")

	# Download the image
	filename = f"image_{index+1}.jpg"
	filepath = os.path.join(SAVE_FOLDER, filename)
	response = requests.get(img_url, stream=True)
	if response.status_code == 200:
	with open(filepath, 'wb') as f:
	for chunk in response.iter_content(1024):
	f.write(chunk)
	print(f"Downloaded {filename} from {post_url}")
	else:
	print(f"Failed to download {filename} from {post_url} (status: {response.status_code})")
	except Exception as e:
	print(f"Error processing {post_url}: {e}")

	def main():
	try:
	print("Logging in to Instagram...")
	login_to_instagram()

	try:
	not_now_button = WebDriverWait(driver, 5).until(
	EC.element_to_be_clickable((By.XPATH, "//button[text()='Not Now']"))
	)
	not_now_button.click()
	except:
	print("No 'Not Now' prompt found, proceeding...")

	print(f"Collecting post URLs from {TARGET_PROFILE}...")
	post_urls = get_post_urls()

	print(f"Downloading full-size images from {len(post_urls)} posts...")
	for i, url in enumerate(post_urls):
	download_full_size_image(url, i)

	except Exception as e:
	print(f"An error occurred: {e}")
	finally:
	driver.quit()
	print("Browser closed.")

	if __name__ == "__main__":
	main()
No results found