Skip to content

Instantly share code, notes, and snippets.

@eliascotto
Created March 29, 2025 07:58
Show Gist options
  • Select an option

  • Save eliascotto/b916672ed7e7e1dc10da17fecbe1f776 to your computer and use it in GitHub Desktop.

Select an option

Save eliascotto/b916672ed7e7e1dc10da17fecbe1f776 to your computer and use it in GitHub Desktop.
Instagram profile saver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import requests
import os
# Configuration
USERNAME = "" # Replace with your Instagram username
PASSWORD = "" # Replace with your Instagram password
TARGET_PROFILE = "" # Replace with the profile you want to download from
SAVE_FOLDER = "" # Replace with your desired folder path
GECKODRIVER_PATH = "" # Replace with your chromedriver path
# Ensure save folder exists
if not os.path.exists(SAVE_FOLDER):
os.makedirs(SAVE_FOLDER)
# Set up Firefox WebDriver
service = Service(GECKODRIVER_PATH)
driver = webdriver.Firefox(service=service)
def login_to_instagram():
driver.get("https://www.instagram.com/accounts/login/")
time.sleep(3) # Wait for page to load
# Enter username
username_field = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, "username"))
)
username_field.send_keys(USERNAME)
# Enter password
password_field = driver.find_element(By.NAME, "password")
password_field.send_keys(PASSWORD)
# Click login button
login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
login_button.click()
# Wait for login to complete
time.sleep(5)
def get_post_urls():
driver.get(f"https://www.instagram.com/{TARGET_PROFILE}/")
time.sleep(3) # Wait for profile to load
# Set to store unique post URLs
post_urls = set()
# Get the total height of the page
total_height = driver.execute_script("return document.body.scrollHeight")
scroll_position = 0
scroll_increment = 500 # Scroll by 1000 pixels at a time (adjustable)
while scroll_position < total_height:
# Find all post links currently loaded
post_elements = driver.find_elements(By.XPATH, "//a[@href]")
for elem in post_elements:
href = elem.get_attribute("href")
if href and "/p/" in href: # Filter for post URLs
post_urls.add(href)
# Scroll incrementally
driver.execute_script(f"window.scrollTo(0, {scroll_position + scroll_increment});")
time.sleep(0.5) # Short delay to mimic human scrolling and allow loading
# Update scroll position and total height (in case new content loads)
scroll_position += scroll_increment
time.sleep(2)
total_height = driver.execute_script("return document.body.scrollHeight")
print(f"Found {len(post_urls)} unique posts.")
return list(post_urls)
def download_full_size_image(post_url, index):
driver.get(post_url)
time.sleep(2) # Wait for post to load
try:
img_element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//img[contains(@class, 'x5yr21d')]"))
)
img_url = img_element.get_attribute("src")
srcset = img_element.get_attribute("srcset")
print(f"Processing {post_url}: src={img_url[:50]}..., srcset={srcset[:50]}...")
# Try to parse srcset for the highest resolution
if srcset:
try:
# Split srcset into entries (e.g., "url 640w, url 1080w")
sources = [s.strip() for s in srcset.split(",")]
# Extract URL and width, assuming format "url width"
best_url = None
max_width = 0
for source in sources:
parts = source.split(" ")
if len(parts) >= 2 and parts[-1].endswith("w"):
width = int(parts[-1].replace("w", ""))
url = " ".join(parts[:-1])
if width > max_width:
max_width = width
best_url = url
if best_url:
img_url = best_url
print(f"Using srcset URL with width {max_width}")
except Exception as e:
print(f"Failed to parse srcset: {e}, falling back to src")
# Download the image
filename = f"image_{index+1}.jpg"
filepath = os.path.join(SAVE_FOLDER, filename)
response = requests.get(img_url, stream=True)
if response.status_code == 200:
with open(filepath, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
print(f"Downloaded {filename} from {post_url}")
else:
print(f"Failed to download {filename} from {post_url} (status: {response.status_code})")
except Exception as e:
print(f"Error processing {post_url}: {e}")
def main():
try:
print("Logging in to Instagram...")
login_to_instagram()
try:
not_now_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, "//button[text()='Not Now']"))
)
not_now_button.click()
except:
print("No 'Not Now' prompt found, proceeding...")
print(f"Collecting post URLs from {TARGET_PROFILE}...")
post_urls = get_post_urls()
print(f"Downloading full-size images from {len(post_urls)} posts...")
for i, url in enumerate(post_urls):
download_full_size_image(url, i)
except Exception as e:
print(f"An error occurred: {e}")
finally:
driver.quit()
print("Browser closed.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment