ChronoMonochrome · February 17, 2024 15:00
diff --git a/fb_scrape.py b/fb_scrape.py
 import argparse
 import code
 import errno
 import getpass
 import os
 import re
 import time
 from io import BytesIO
 from PIL import Image
 from base64 import b64decode
 from selenium import webdriver
 from selenium.common.exceptions import WebDriverException
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.by import By

 def main_loop(driver, actions, username, limit):
    try:
        os.makedirs(username)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
        
    first_img_url = ""
    c = 0

    while True:
        img_div = driver.find_element('xpath', '//img[@data-visualcompletion="media-vc-image"]')
        img_src = img_div.get_attribute('src')

        if c == 0:
            first_img_url = img_src
        elif img_src == first_img_url:
            print("Encountered already visited photo, exiting")
            break
            
        
        img_filename = os.path.join(username, os.path.basename(img_src).split('?')[0].replace('.jpg', '.png'))
        try:
            driver.execute_script("arguments[0].setAttribute('crossorigin',arguments[1])", img_div, 'anonymous')
            b64img = driver.execute_script(r'''
                    function getElementByXpath(path) {
                      return document.evaluate(
                        path,
                        document,
                        null,
                        XPathResult.FIRST_ORDERED_NODE_TYPE,
                        null,
                      ).singleNodeValue;
                    }

                    var img = getElementByXpath('//img[@data-visualcompletion="media-vc-image"]');
                    var canvas = document.createElement("canvas");
                    canvas.width = img.naturalWidth;
                    canvas.height = img.naturalHeight;
                    var ctx = canvas.getContext("2d");
                    ctx.drawImage(
                      img,
                      0,
                      0,
                      img.naturalWidth,
                      img.naturalHeight,
                      0,
                      0,
                      img.naturalWidth,
                      img.naturalHeight,
                    );
                    var dataURL = canvas.toDataURL("image/png");
                    return dataURL.replace(/^data:image\/(png|jpg);base64,/, "");
            ''')
            image = Image.open(BytesIO(b64decode(b64img)))
            image.save(img_filename)
            print(f"Saved image as {img_filename}")
        except Exception as e:
            print(f"Error saving image: {e}")
        actions.send_keys(Keys.RIGHT)
        actions.perform()
        time.sleep(3)
        
        c += 1
        if limit > 0 and c >= limit:
            break
        
 def is_valid_username(username):
    return re.match(r"^[a-zA-Z0-9_.-]+$", username)

 def extract_username_from_url(url):
    m = re.match(r"^https?://(?:www\.)?facebook\.com/([a-zA-Z0-9_.-]+)(?:/photos_by)?$", url)
    if m:
        return m.group(1)
    # if url doesn't start with "http" we assume user intends to provide the profile username
    if url.startswith("http"):
        return None
    return url

 def construct_profile_url(input_str):
    username = extract_username_from_url(input_str)
    
    if username is None:
        print("Invalid URL format. Please provide a valid Facebook profile URL or username.")
        exit(1)
    
    if not is_valid_username(username):
        print("Warning: invalid username format detected. Username should only contain letters, digits, underscores, and dashes.")
    
    return f"https://www.facebook.com/{username}/photos_by"

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Construct Facebook profile URL")
    parser.add_argument("-b", "--binary-path", type=str, default="/usr/bin/chromium", help="Path to chromium executable")
    parser.add_argument("-d", "--driver-path", type=str, default="/usr/local/bin/chromedriver", help="Path to chromedriver executable")
    parser.add_argument("-u", "--url", required=True, type=str, help="Facebook profile URL or username")
    parser.add_argument("-l", "--limit", type=int, default=-1, help="The maximum number of photos to download. If -1 (default), download all photos.")

    args = parser.parse_args()
    constructed_url = construct_profile_url(args.url)

    try:
        # Path to the ChromeDriver executable
        chrome_driver_path = args.driver_path

        # Setup the ChromeDriver service
        service = Service(chrome_driver_path)
        service.start()

        # Path to Chromium executable
        chrome_path = args.binary_path

        # Configure Chrome options
        chrome_options = webdriver.ChromeOptions()
        chrome_options.binary_location = chrome_path
        chrome_options.add_argument(f"--user-data-dir=/home/{getpass.getuser()}/.config/chromium")
        chrome_options.add_argument("--profile-directory=Default")

        # Launch Chrome with the configured options
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.get(constructed_url)
        time.sleep(3)

        input("Waiting to close the login popup. Press any key to continue.")

        actions = ActionChains(driver) 
        imgs = driver.find_elements(By.TAG_NAME, "img")
        first_img = None
        for img in imgs:
            if img.get_attribute("alt") != "":
                first_img = img
                break

        if not first_img:
            raise Exception("Error: no profile image found")
        
        first_img.click()
        time.sleep(1)
        main_loop(driver, actions, username=extract_username_from_url(args.url), limit=args.limit)
    except (WebDriverException, Exception) as e:
        print(f"An error occurred: {e}")
        code.interact(local=locals())
	import argparse
	import code
	import errno
	import getpass
	import os
	import re
	import time
	from io import BytesIO
	from PIL import Image
	from base64 import b64decode
	from selenium import webdriver
	from selenium.common.exceptions import WebDriverException
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.common.action_chains import ActionChains
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.common.by import By

	def main_loop(driver, actions, username, limit):
	try:
	os.makedirs(username)
	except OSError as e:
	if e.errno != errno.EEXIST:
	raise

	first_img_url = ""
	c = 0

	while True:
	img_div = driver.find_element('xpath', '//img[@data-visualcompletion="media-vc-image"]')
	img_src = img_div.get_attribute('src')

	if c == 0:
	first_img_url = img_src
	elif img_src == first_img_url:
	print("Encountered already visited photo, exiting")
	break


	img_filename = os.path.join(username, os.path.basename(img_src).split('?')[0].replace('.jpg', '.png'))
	try:
	driver.execute_script("arguments[0].setAttribute('crossorigin',arguments[1])", img_div, 'anonymous')
	b64img = driver.execute_script(r'''
	function getElementByXpath(path) {
	return document.evaluate(
	path,
	document,
	null,
	XPathResult.FIRST_ORDERED_NODE_TYPE,
	null,
	).singleNodeValue;
	}

	var img = getElementByXpath('//img[@data-visualcompletion="media-vc-image"]');
	var canvas = document.createElement("canvas");
	canvas.width = img.naturalWidth;
	canvas.height = img.naturalHeight;
	var ctx = canvas.getContext("2d");
	ctx.drawImage(
	img,
	0,
	0,
	img.naturalWidth,
	img.naturalHeight,
	0,
	0,
	img.naturalWidth,
	img.naturalHeight,
	);
	var dataURL = canvas.toDataURL("image/png");
	return dataURL.replace(/^data:image\/(png\|jpg);base64,/, "");
	''')
	image = Image.open(BytesIO(b64decode(b64img)))
	image.save(img_filename)
	print(f"Saved image as {img_filename}")
	except Exception as e:
	print(f"Error saving image: {e}")
	actions.send_keys(Keys.RIGHT)
	actions.perform()
	time.sleep(3)

	c += 1
	if limit > 0 and c >= limit:
	break

	def is_valid_username(username):
	return re.match(r"^[a-zA-Z0-9_.-]+$", username)

	def extract_username_from_url(url):
	m = re.match(r"^https?://(?:www\.)?facebook\.com/([a-zA-Z0-9_.-]+)(?:/photos_by)?$", url)
	if m:
	return m.group(1)
	# if url doesn't start with "http" we assume user intends to provide the profile username
	if url.startswith("http"):
	return None
	return url

	def construct_profile_url(input_str):
	username = extract_username_from_url(input_str)

	if username is None:
	print("Invalid URL format. Please provide a valid Facebook profile URL or username.")
	exit(1)

	if not is_valid_username(username):
	print("Warning: invalid username format detected. Username should only contain letters, digits, underscores, and dashes.")

	return f"https://www.facebook.com/{username}/photos_by"

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Construct Facebook profile URL")
	parser.add_argument("-b", "--binary-path", type=str, default="/usr/bin/chromium", help="Path to chromium executable")
	parser.add_argument("-d", "--driver-path", type=str, default="/usr/local/bin/chromedriver", help="Path to chromedriver executable")
	parser.add_argument("-u", "--url", required=True, type=str, help="Facebook profile URL or username")
	parser.add_argument("-l", "--limit", type=int, default=-1, help="The maximum number of photos to download. If -1 (default), download all photos.")

	args = parser.parse_args()
	constructed_url = construct_profile_url(args.url)

	try:
	# Path to the ChromeDriver executable
	chrome_driver_path = args.driver_path

	# Setup the ChromeDriver service
	service = Service(chrome_driver_path)
	service.start()

	# Path to Chromium executable
	chrome_path = args.binary_path

	# Configure Chrome options
	chrome_options = webdriver.ChromeOptions()
	chrome_options.binary_location = chrome_path
	chrome_options.add_argument(f"--user-data-dir=/home/{getpass.getuser()}/.config/chromium")
	chrome_options.add_argument("--profile-directory=Default")

	# Launch Chrome with the configured options
	driver = webdriver.Chrome(service=service, options=chrome_options)
	driver.get(constructed_url)
	time.sleep(3)

	input("Waiting to close the login popup. Press any key to continue.")

	actions = ActionChains(driver)
	imgs = driver.find_elements(By.TAG_NAME, "img")
	first_img = None
	for img in imgs:
	if img.get_attribute("alt") != "":
	first_img = img
	break

	if not first_img:
	raise Exception("Error: no profile image found")

	first_img.click()
	time.sleep(1)
	main_loop(driver, actions, username=extract_username_from_url(args.url), limit=args.limit)
	except (WebDriverException, Exception) as e:
	print(f"An error occurred: {e}")
	code.interact(local=locals())
No results found