-
-
Save JuryA/1f7738bf37806837f13378f0d2fe4bd2 to your computer and use it in GitHub Desktop.
LinkedIn scraper to get contact infos for some compay current or former employees
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns scraper.linkedin | |
"This is meant to be a very simple sktech on how to scrap some LinkedIn network with etaoin, enlive, and Google Chrome. This is by no mean the more efficient, or elegant way to do so." | |
(:require [scraper.utils :refer [email password some-company-id]] | |
[net.cgrand.enlive-html :as html] | |
[etaoin.api :refer :all] | |
[clojure.string :as str])) | |
(defonce driver (chrome)) | |
(defn former-company-employees | |
[company-id] | |
(str "https://www.linkedin.com/search/results/people/?facetGeoRegion=%5B%22fr%3A5227%22%5D&facetPastCompany=%5B%22" company-id "%22%5D&origin=FACETED_SEARCH")) | |
(defn current-company-employees | |
[company-id] | |
(str "https://www.linkedin.com/search/results/people/?facetCurrentCompany=%5B%22" company-id "%22%5D&facetGeoRegion=%5B%22fr%3A5227%22%5D&origin=FACETED_SEARCH")) | |
(defn contact-list [driver] | |
(scroll-down driver 2e4) | |
(wait driver 1) | |
(cond (has-text? driver "Next") | |
(lazy-seq (concat (filter #(< 1 (count (:content %))) | |
(-> (html/html-snippet (get-element-inner-html driver {:tag "html"})) | |
(html/select [(html/attr-contains :data-test-search-result "PROFILE")]))) | |
(do (wait driver 1) | |
(scroll-down driver 2e4) | |
(click-visible driver {:tag :button :aria-label "Next"}) | |
(contact-list driver)))) | |
(has-text? driver "No results found.") nil | |
:else (contact-list driver))) | |
(defn name-and-link | |
[item] | |
{:technical/raw-result-li item | |
:profile/name (first (:content (first (html/select item [(html/attr-ends :class "actor-name")])))) | |
:profile/url (str "https://www.linkedin.com" (:href (:attrs (first (html/select item [(html/attr-contains :data-control-name "search_srp_result")]))))) | |
:profile/short-description (-> (html/select item [:div :p (html/nth-child 1)]) first :content first)}) | |
(defn enrich-profile | |
[item] | |
(go driver (str (:profile/url item))) | |
(wait-visible driver {:class "core-rail"}) | |
(click driver {:id (:id (:attrs (first (html/select (html/html-snippet (get-element-inner-html driver {:class "core-rail"})) [(html/attr-contains :data-control-name "contact_see_more")]))))}) | |
(wait driver 0.5) | |
(let [contact-infos (map (comp :href :attrs) (html/select (html/html-snippet (get-element-inner-html driver {:class "pv-profile-section__section-info section-info"})) [(html/attr-contains :class "pv-contact-info__ci-container") :a]))] | |
(click driver {:aria-label "Dismiss"}) | |
(wait driver 0.5) | |
(scroll-down driver 2e4) | |
(assoc item | |
:profile/contact-infos contact-infos | |
:technical/raw-profile (html/html-snippet (get-element-inner-html driver {:tag "html"}))))) | |
(defn export-profiles | |
[profiles] | |
(str/join "\n" (map (fn [profile] | |
(str/join "\t" | |
[(:profile/name profile) | |
(:profile/short-description profile) | |
(str/join ", " (:profile/contact-infos profile))])) | |
profiles))) | |
(defn log-in-piotr-yuxuan! | |
[] | |
(go driver "https://www.linkedin.com") | |
;; The window needs to be visible | |
(fill driver {:tag :input :name "session_key"} email) | |
(fill driver {:tag :input :name "session_password"} password) | |
(click driver {:tag :button :aria-label "i18n_sign-in"})) | |
(defn main! | |
"You should be logged in before. Better to unwrap this function and run it manually." | |
[] | |
(let [former-listing (contact-list driver) | |
former-profiles (->> former-listing | |
(map name-and-link) | |
(map enrich-profile))] | |
(go driver (former-company-employees some-company-id)) | |
(spit "former-profiles.csv" (export-profiles former-profiles))) | |
(let [current-listing (contact-list driver) | |
current-profiles (->> current-listing | |
(map name-and-link) | |
(map enrich-profile))] | |
(go driver (current-company-employees some-company-id)) | |
(spit "current-profiles.csv" (export-profiles current-profiles)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment