Created
March 29, 2025 07:58
-
-
Save eliascotto/b916672ed7e7e1dc10da17fecbe1f776 to your computer and use it in GitHub Desktop.
Instagram profile saver
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.firefox.service import Service | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| import time | |
| import requests | |
| import os | |
| # Configuration | |
| USERNAME = "" # Replace with your Instagram username | |
| PASSWORD = "" # Replace with your Instagram password | |
| TARGET_PROFILE = "" # Replace with the profile you want to download from | |
| SAVE_FOLDER = "" # Replace with your desired folder path | |
| GECKODRIVER_PATH = "" # Replace with your chromedriver path | |
| # Ensure save folder exists | |
| if not os.path.exists(SAVE_FOLDER): | |
| os.makedirs(SAVE_FOLDER) | |
| # Set up Firefox WebDriver | |
| service = Service(GECKODRIVER_PATH) | |
| driver = webdriver.Firefox(service=service) | |
| def login_to_instagram(): | |
| driver.get("https://www.instagram.com/accounts/login/") | |
| time.sleep(3) # Wait for page to load | |
| # Enter username | |
| username_field = WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.NAME, "username")) | |
| ) | |
| username_field.send_keys(USERNAME) | |
| # Enter password | |
| password_field = driver.find_element(By.NAME, "password") | |
| password_field.send_keys(PASSWORD) | |
| # Click login button | |
| login_button = driver.find_element(By.XPATH, "//button[@type='submit']") | |
| login_button.click() | |
| # Wait for login to complete | |
| time.sleep(5) | |
| def get_post_urls(): | |
| driver.get(f"https://www.instagram.com/{TARGET_PROFILE}/") | |
| time.sleep(3) # Wait for profile to load | |
| # Set to store unique post URLs | |
| post_urls = set() | |
| # Get the total height of the page | |
| total_height = driver.execute_script("return document.body.scrollHeight") | |
| scroll_position = 0 | |
| scroll_increment = 500 # Scroll by 1000 pixels at a time (adjustable) | |
| while scroll_position < total_height: | |
| # Find all post links currently loaded | |
| post_elements = driver.find_elements(By.XPATH, "//a[@href]") | |
| for elem in post_elements: | |
| href = elem.get_attribute("href") | |
| if href and "/p/" in href: # Filter for post URLs | |
| post_urls.add(href) | |
| # Scroll incrementally | |
| driver.execute_script(f"window.scrollTo(0, {scroll_position + scroll_increment});") | |
| time.sleep(0.5) # Short delay to mimic human scrolling and allow loading | |
| # Update scroll position and total height (in case new content loads) | |
| scroll_position += scroll_increment | |
| time.sleep(2) | |
| total_height = driver.execute_script("return document.body.scrollHeight") | |
| print(f"Found {len(post_urls)} unique posts.") | |
| return list(post_urls) | |
| def download_full_size_image(post_url, index): | |
| driver.get(post_url) | |
| time.sleep(2) # Wait for post to load | |
| try: | |
| img_element = WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.XPATH, "//img[contains(@class, 'x5yr21d')]")) | |
| ) | |
| img_url = img_element.get_attribute("src") | |
| srcset = img_element.get_attribute("srcset") | |
| print(f"Processing {post_url}: src={img_url[:50]}..., srcset={srcset[:50]}...") | |
| # Try to parse srcset for the highest resolution | |
| if srcset: | |
| try: | |
| # Split srcset into entries (e.g., "url 640w, url 1080w") | |
| sources = [s.strip() for s in srcset.split(",")] | |
| # Extract URL and width, assuming format "url width" | |
| best_url = None | |
| max_width = 0 | |
| for source in sources: | |
| parts = source.split(" ") | |
| if len(parts) >= 2 and parts[-1].endswith("w"): | |
| width = int(parts[-1].replace("w", "")) | |
| url = " ".join(parts[:-1]) | |
| if width > max_width: | |
| max_width = width | |
| best_url = url | |
| if best_url: | |
| img_url = best_url | |
| print(f"Using srcset URL with width {max_width}") | |
| except Exception as e: | |
| print(f"Failed to parse srcset: {e}, falling back to src") | |
| # Download the image | |
| filename = f"image_{index+1}.jpg" | |
| filepath = os.path.join(SAVE_FOLDER, filename) | |
| response = requests.get(img_url, stream=True) | |
| if response.status_code == 200: | |
| with open(filepath, 'wb') as f: | |
| for chunk in response.iter_content(1024): | |
| f.write(chunk) | |
| print(f"Downloaded {filename} from {post_url}") | |
| else: | |
| print(f"Failed to download {filename} from {post_url} (status: {response.status_code})") | |
| except Exception as e: | |
| print(f"Error processing {post_url}: {e}") | |
| def main(): | |
| try: | |
| print("Logging in to Instagram...") | |
| login_to_instagram() | |
| try: | |
| not_now_button = WebDriverWait(driver, 5).until( | |
| EC.element_to_be_clickable((By.XPATH, "//button[text()='Not Now']")) | |
| ) | |
| not_now_button.click() | |
| except: | |
| print("No 'Not Now' prompt found, proceeding...") | |
| print(f"Collecting post URLs from {TARGET_PROFILE}...") | |
| post_urls = get_post_urls() | |
| print(f"Downloading full-size images from {len(post_urls)} posts...") | |
| for i, url in enumerate(post_urls): | |
| download_full_size_image(url, i) | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| finally: | |
| driver.quit() | |
| print("Browser closed.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment