Created
June 16, 2022 02:49
-
-
Save candyan/786bf1b1fcc81dcd34891d219d5c4dc0 to your computer and use it in GitHub Desktop.
playwright nox
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from playwright.sync_api import sync_playwright | |
class NoxInfluencerCrawler(object): | |
def crawl_details(self, detail_urls): | |
with sync_playwright() as p: | |
brower = p.chromium.launch(headless=False) | |
context = brower.new_context() | |
page = context.new_page() | |
page.set_default_navigation_timeout(60*1000) | |
login_url = "https://www.noxinfluencer.com/login?userType=brand&service=https%3A%2F%2Fwww.noxinfluencer.com%2F" | |
page.goto(login_url) | |
page.locator("#email").focus() | |
page.keyboard.insert_text("[email protected]") | |
page.locator("#pwd").focus() | |
page.keyboard.insert_text("Q!w2e3r4") | |
page.locator("#login-submit").click() | |
page.wait_for_url("*noxinfluencer.com/youtube/search", timeout=60*1000) | |
page.wait_for_timeout(5*1000) | |
print(context.cookies) | |
print(context.storage_state) | |
results = [] | |
for detail_url in detail_urls: | |
name = None | |
youtube_url = None | |
instagram_url = None | |
fb_url = None | |
tw_url = None | |
tt_url = None | |
try: | |
page.goto(detail_url) | |
page.wait_for_timeout(10*1000) | |
except Exception as e: | |
print(e) | |
print(detail_url) | |
continue | |
try: | |
name_selector = page.wait_for_selector(".info-block .title span", timeout=500) | |
if name_selector: | |
name = name_selector.text_content() | |
except: | |
pass | |
try: | |
youtube_url_selector = page.wait_for_selector("[sensors-name='social_ytb']", timeout=500) | |
if youtube_url_selector: | |
youtube_url = youtube_url_selector.get_attribute("href") | |
except: | |
pass | |
try: | |
instagram_url_selector = page.wait_for_selector("[sensors-name='social_ins']", timeout=500) | |
if instagram_url_selector: | |
instagram_url = instagram_url_selector.get_attribute("href") | |
except: | |
pass | |
try: | |
fb_url_selector = page.wait_for_selector("[sensors-name='social_fb']", timeout=500) | |
if fb_url_selector: | |
fb_url = fb_url_selector.get_attribute("href") | |
except: | |
pass | |
try: | |
tw_url_selector = page.wait_for_selector("[sensors-name='social_tw']", timeout=500) | |
if tw_url_selector: | |
tw_url = tw_url_selector.get_attribute("href") | |
except: | |
pass | |
try: | |
tt_url_selector = page.wait_for_selector("[sensors-name='social_tt']", timeout=500) | |
if tt_url_selector: | |
tt_url = tt_url_selector.get_attribute("href") | |
except: | |
pass | |
results.append((name, youtube_url, instagram_url, fb_url, tw_url, tt_url)) | |
if (len(results) <= 0): | |
print("error") | |
return results | |
def demo(self): | |
with sync_playwright() as p: | |
brower = p.chromium.launch(headless=False) | |
page = brower.new_page() | |
page.goto("https://www.noxinfluencer.com/youtube/channel/UCPFhnt7T8iaBLp1wIfFRzFw") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment