Skip to content

Instantly share code, notes, and snippets.

@JuryA
Forked from piotr-yuxuan/linkedin.clj
Created August 7, 2020 17:40
Show Gist options
  • Save JuryA/1f7738bf37806837f13378f0d2fe4bd2 to your computer and use it in GitHub Desktop.
Save JuryA/1f7738bf37806837f13378f0d2fe4bd2 to your computer and use it in GitHub Desktop.
LinkedIn scraper to get contact infos for some compay current or former employees
(ns scraper.linkedin
"This is meant to be a very simple sktech on how to scrap some LinkedIn network with etaoin, enlive, and Google Chrome. This is by no mean the more efficient, or elegant way to do so."
(:require [scraper.utils :refer [email password some-company-id]]
[net.cgrand.enlive-html :as html]
[etaoin.api :refer :all]
[clojure.string :as str]))
(defonce driver (chrome))
(defn former-company-employees
[company-id]
(str "https://www.linkedin.com/search/results/people/?facetGeoRegion=%5B%22fr%3A5227%22%5D&facetPastCompany=%5B%22" company-id "%22%5D&origin=FACETED_SEARCH"))
(defn current-company-employees
[company-id]
(str "https://www.linkedin.com/search/results/people/?facetCurrentCompany=%5B%22" company-id "%22%5D&facetGeoRegion=%5B%22fr%3A5227%22%5D&origin=FACETED_SEARCH"))
(defn contact-list [driver]
(scroll-down driver 2e4)
(wait driver 1)
(cond (has-text? driver "Next")
(lazy-seq (concat (filter #(< 1 (count (:content %)))
(-> (html/html-snippet (get-element-inner-html driver {:tag "html"}))
(html/select [(html/attr-contains :data-test-search-result "PROFILE")])))
(do (wait driver 1)
(scroll-down driver 2e4)
(click-visible driver {:tag :button :aria-label "Next"})
(contact-list driver))))
(has-text? driver "No results found.") nil
:else (contact-list driver)))
(defn name-and-link
[item]
{:technical/raw-result-li item
:profile/name (first (:content (first (html/select item [(html/attr-ends :class "actor-name")]))))
:profile/url (str "https://www.linkedin.com" (:href (:attrs (first (html/select item [(html/attr-contains :data-control-name "search_srp_result")])))))
:profile/short-description (-> (html/select item [:div :p (html/nth-child 1)]) first :content first)})
(defn enrich-profile
[item]
(go driver (str (:profile/url item)))
(wait-visible driver {:class "core-rail"})
(click driver {:id (:id (:attrs (first (html/select (html/html-snippet (get-element-inner-html driver {:class "core-rail"})) [(html/attr-contains :data-control-name "contact_see_more")]))))})
(wait driver 0.5)
(let [contact-infos (map (comp :href :attrs) (html/select (html/html-snippet (get-element-inner-html driver {:class "pv-profile-section__section-info section-info"})) [(html/attr-contains :class "pv-contact-info__ci-container") :a]))]
(click driver {:aria-label "Dismiss"})
(wait driver 0.5)
(scroll-down driver 2e4)
(assoc item
:profile/contact-infos contact-infos
:technical/raw-profile (html/html-snippet (get-element-inner-html driver {:tag "html"})))))
(defn export-profiles
[profiles]
(str/join "\n" (map (fn [profile]
(str/join "\t"
[(:profile/name profile)
(:profile/short-description profile)
(str/join ", " (:profile/contact-infos profile))]))
profiles)))
(defn log-in-piotr-yuxuan!
[]
(go driver "https://www.linkedin.com")
;; The window needs to be visible
(fill driver {:tag :input :name "session_key"} email)
(fill driver {:tag :input :name "session_password"} password)
(click driver {:tag :button :aria-label "i18n_sign-in"}))
(defn main!
"You should be logged in before. Better to unwrap this function and run it manually."
[]
(let [former-listing (contact-list driver)
former-profiles (->> former-listing
(map name-and-link)
(map enrich-profile))]
(go driver (former-company-employees some-company-id))
(spit "former-profiles.csv" (export-profiles former-profiles)))
(let [current-listing (contact-list driver)
current-profiles (->> current-listing
(map name-and-link)
(map enrich-profile))]
(go driver (current-company-employees some-company-id))
(spit "current-profiles.csv" (export-profiles current-profiles))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment