Created
October 18, 2018 16:27
-
-
Save ioness/f548552c2a29754b5a2085cf83ffb845 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
def data(driver, links): | |
data = [] | |
for link in links: | |
data.append(get(driver, link)) | |
return data | |
def get(driver, link): | |
data = ['', '', '', '', '', '', ''] | |
arr = [] | |
arr2 = [] | |
try: | |
driver.get(link) | |
# name | |
arr = driver.find_elements_by_xpath('//*[@id="maincol"]/div[1]/div[2]/div[2]/b') | |
if len(arr) > 0: | |
data[0] = arr[0].text.split('\n')[0] | |
# type and year | |
arr = driver.find_elements_by_xpath('//*[@id="maincol"]/div[1]/div[2]/div[2]/b/span') | |
if len(arr) > 0: | |
data[1] = arr[0].text | |
arr = driver.find_elements_by_xpath('//*[@id="personal_info"]//li[@class="ico_email"]/b/a') | |
if len(arr) > 0: | |
data[2] = arr[0].get_attribute('href').split(':')[1] | |
# ico_other | |
arr = driver.find_elements_by_xpath('//*[@id="personal_info"]//li[@class="ico_other"]') | |
for item in arr: | |
if 'Nationalities:' in item.text: | |
data[3] = item.find_element_by_xpath('./b').text | |
if 'School/Program:' in item.text: | |
data[4] = item.find_element_by_xpath('./b').text | |
if 'Degree:' in item.text: | |
data[5] = item.find_element_by_xpath('./b').text | |
if 'Graduation:' in item.text: | |
arr2 = item.find_elements_by_xpath('./b') | |
for d in arr2: | |
data[6] = arr2[0].text + ' ' + arr2[1].text | |
except: | |
return [] | |
return data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
def driver(): | |
option = webdriver.ChromeOptions() | |
option.add_argument(' — incognito') | |
return webdriver.Chrome(executable_path='/path/to/chromedriver', chrome_options=option) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
def exp (data): | |
with open('/foo/g19.csv', "w") as output: | |
writer = csv.writer(output, lineterminator='\n') | |
writer.writerows(data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
from selenium.common.exceptions import NoSuchElementException | |
from selenium.common.exceptions import StaleElementReferenceException | |
import time | |
def list(driver): | |
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '//tbody[@id="divAllItems"]/div[1]/div/div[1]/div[1]/div[2]/h4/a'))) | |
open = driver.find_element_by_xpath('//tbody[@id="divAllItems"]/div[1]/div/div[1]/div[1]/div[2]/h4/a') | |
open.click() | |
WebDriverWait(driver, 10000).until(EC.visibility_of_element_located((By.XPATH, '//*[@id="primary-modal"]/div/div/div/div[1]/div[2]/div[2]/div/a'))) | |
driver.execute_script("document.getElementById('primary-modal').scrollTo(0, document.getElementById('primary-modal').scrollHeight)") | |
old = None | |
new = None | |
next = None | |
links = [] | |
n = 0 | |
ignored_exceptions = (NoSuchElementException,StaleElementReferenceException,) | |
while(n < 9000): | |
WebDriverWait(driver, 1000, ignored_exceptions=ignored_exceptions).until(element_is_new((By.XPATH, '//*[@id="primary-modal"]/div/div/div/div[1]/div[2]/div[2]/div/a'), old)) | |
new = driver.find_element_by_xpath('//*[@id="primary-modal"]/div/div/div/div[1]/div[2]/div[2]/div/a').get_attribute('href') | |
links.append(new) | |
next = driver.find_element_by_xpath('//*[@id="profile-btn--next"]') | |
next.click() | |
n = n + 1 | |
old = new | |
return links | |
class element_is_new(object): | |
def __init__(self, locator, href): | |
self.locator = locator | |
self.href = href | |
def __call__(self, driver): | |
element = driver.find_element(*self.locator) # Finding the referenced element | |
if self.href != element.get_attribute("href"): | |
return element | |
else: | |
return False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mod.driver import driver | |
from mod.pre import pre | |
from mod.list import list | |
from mod.scroll import scroll | |
from mod.data import data | |
from mod.exp import exp | |
try: | |
driver = driver() | |
pre(driver) | |
scroll(driver) | |
list = list(driver) | |
data = data(driver, list) | |
exp(data) | |
except Exception as e: | |
print(e) | |
print('Hello, exception') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
def pre(driver): | |
driver.get('https://foo_url') | |
login_page_link = driver.find_element_by_xpath('//div[@class="ctnLogin"]/p[@class="center"]/a') | |
driver.get(login_page_link.get_attribute('href')) | |
driver.find_element_by_id('userNameInput').send_keys('usuario') | |
driver.find_element_by_id('passwordInput').send_keys('contrasena') | |
driver.find_element_by_id('submitButton').submit() | |
driver.get('https://foo_url2') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
def scroll(driver): | |
old = 0 | |
new = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);return document.body.scrollHeight;") | |
while(old!=new): | |
WebDriverWait(driver, 20).until(element_has_not_text((By.XPATH, '//div[@id="generic"]'), 'Loading Directory...')) | |
old = new | |
new = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);return document.body.scrollHeight;") | |
driver.execute_script("window.scrollTo(0, 0);return 1;") | |
time.sleep(5) | |
class element_has_not_text(object): | |
def __init__(self, locator, text): | |
self.locator = locator | |
self.text = text | |
def __call__(self, driver): | |
element = driver.find_element(*self.locator) # Finding the referenced element | |
if self.text != element.get_attribute("textContent"): | |
return element | |
else: | |
return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment