Last active
March 28, 2025 15:19
-
-
Save edjafarov/61cfb26743afeeebf6afb15f907861c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>Bookmarklet Installer</title> | |
<style> | |
.bookmarklet { | |
display: inline-block; | |
padding: 10px 15px; | |
background: #0073b1; | |
color: white; | |
text-decoration: none; | |
border-radius: 5px; | |
cursor: grab; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Install the Bookmarklet</h1> | |
<p>Drag the button below to your bookmarks bar:</p> | |
<a id="bookmarklet" class="bookmarklet">🔗 LinkedIn Scraper</a> | |
<script> | |
const bookmarkletCode = "!function()%7Basync%20function%20e(e%2Ct%3D1e3)%7Blet%20n%3Ddocument.getElementById(e)%3Bif(!n)%7Bconsole.error(%60Container%20with%20ID%20%22%24%7Be%7D%22%20not%20found.%60)%3Breturn%7Dlet%20o%3D0%2Cl%3Breturn%20new%20Promise(e%3D%3E%7Bl%3DsetInterval(()%3D%3E%7Bn.scrollBy(0%2C300%2B100*Math.random())%2Cn.scrollTop%3D%3D%3Do%3F(clearInterval(l)%2Cl%3Dnull%2Cconsole.log(%22Reached%20the%20bottom%20of%20the%20container.%22)%2Ce())%3Ao%3Dn.scrollTop%7D%2Ct%2B1e3*Math.random())%7D)%7Dfunction%20t()%7Blet%20e%3D%5B%5D%2Ct%3Ddocument.querySelectorAll(%22%5Bdata-view-name%3D'search-results-entity'%5D%22)%3Breturn%20t.forEach(t%3D%3E%7Blet%20n%3D%7B%7D%2Co%3Dt.querySelector('span%5Bdata-anonymize%3D%22person-name%22%5D')%3Bn.name%3Do%3Fo.textContent.trim()%3Anull%3Blet%20l%3Dt.querySelector('a%5Bdata-lead-search-result*%3D%22profile-link%22%5D')%3Bn.profileLink%3Dl%3Fl.href%3Anull%3Blet%20r%3Dt.querySelector('span%5Bdata-anonymize%3D%22title%22%5D')%3Bn.jobTitle%3Dr%3Fr.textContent.trim()%3Anull%3Blet%20a%3Dt.querySelector(%22.flex.flex-column%20dl%20.inline-flex.align-items-baseline%20dd%22)%3Bn.about%3Da%3Fa.textContent.trim()%3Anull%3Blet%20i%3Dt.querySelector('a%5Bdata-anonymize%3D%22company-name%22%5D')%3Bn.company%3Di%3Fi.textContent.trim()%3Anull%2Cn.companyLink%3Di%3Fi.href%3Anull%3Blet%20c%3Dt.querySelector('span%5Bdata-anonymize%3D%22location%22%5D')%3Bn.location%3Dc%3Fc.textContent.trim()%3Anull%3Blet%20u%3Dt.querySelector(%22.artdeco-entity-lockup__metadata%22)%3Bn.experience%3Du%3Fu.textContent.trim()%3Anull%3Blet%20s%3Dt.querySelector(%22button%5Bdata-sales-action%5D%22)%3Bn.recentActivity%3Ds%3Fs.textContent.trim()%3Anull%2Ce.push(n)%7D)%2Ce%7Dasync%20function%20n()%7Btry%7Blet%20n%3De%3D%3Enew%20Promise(t%3D%3EsetTimeout(t%2Ce))%2Co%3Ddocument.querySelector(%22.artdeco-pagination__pages%22)%3Bif(!o)throw%20Error(%22Pagination%20container%20not%20found.%22)%3Blet%20l%3Do.querySelector(%22li%5Bdata-test-pagination-page-btn%5D%3Alast-child%20span%22)%2Cr%3Dl%3FparseInt(l.textContent.trim())%3A1%3Bconsole.log(%60Total%20pages%3A%20%24%7Br%7D%60)%3Blet%20a%3D%5B%5D%3Bfor(let%20i%3D1%3Bi%3C%3Dr%3Bi%2B%2B)%7Bconsole.log(%60Processing%20page%20%24%7Bi%7D...%60)%3Blet%20c%3Ddocument.querySelector(%22%23search-results-container%22)%3Bif(!c)throw%20Error(%22Search%20results%20container%20not%20found.%22)%3Bawait%20e(%22search-results-container%22)%3Blet%20u%3Dt()%3Ba.push(...u)%2Cconsole.log(%60Extracted%20%24%7Bu.length%7D%20profiles%20from%20page%20%24%7Bi%7D.%60)%3Blet%20s%3Ddocument.querySelector(%22.artdeco-pagination__button--next%3Anot(%5Bdisabled%5D)%22)%3Bif(s%26%26i%3Cr)s.click()%2Cawait%20n(5e3)%3Belse%20if(!s%26%26i%3Cr)throw%20Error(%22Next%20page%20button%20not%20found%20or%20disabled.%22)%7Dreturn%20console.log(%22Scraping%20completed.%22)%2Cconsole.log(%22Extracted%20profiles%3A%22%2Ca)%2Ca.filter(e%3D%3Ee.name)%7Dcatch(p)%7Breturn%20console.error(%22An%20error%20occurred%3A%22%2Cp)%2C%5B%5D%7D%7Dn().then(e%3D%3E%7Be.length%3E0%26%26function%20e(t)%7Blet%20n%3Dt.map(e%3D%3E%5Be.name%7C%7C%22%22%2Ce.profileLink%7C%7C%22%22%2Ce.jobTitle%7C%7C%22%22%2Ce.about%7C%7C%22%22%2Ce.company%7C%7C%22%22%2Ce.companyLink%7C%7C%22%22%2Ce.location%7C%7C%22%22%2Ce.experience%7C%7C%22%22%2Ce.recentActivity%7C%7C%22%22%2C%5D.map(e%3D%3E%60%22%24%7Be.replace(%2F%22%2Fg%2C'%22%22')%7D%22%60).join(%22%2C%22))%2Co%3D%5B%22Name%2CProfile%20Link%2CJob%20Title%2CAbout%2CCompany%2CCompany%20Link%2CLocation%2CExperience%2CRecent%20Activity%22%2C...n%5D.join(%22%0A%22)%2Cl%3Dnew%20Blob(%5Bo%5D%2C%7Btype%3A%22text%2Fcsv%3Bcharset%3Dutf-8%3B%22%7D)%2Cr%3Ddocument.createElement(%22a%22)%2Ca%3DURL.createObjectURL(l)%3Br.setAttribute(%22href%22%2Ca)%2Cr.setAttribute(%22download%22%2C%60linkedin_profiles_%24%7Bnew%20Date().toISOString().split(%22T%22)%5B0%5D%7D.csv%60)%2Cr.style.visibility%3D%22hidden%22%2Cdocument.body.appendChild(r)%2Cr.click()%2Cdocument.body.removeChild(r)%7D(e)%7D)%7D()%3B"; | |
const a = document.getElementById('bookmarklet'); | |
a.setAttribute('href', 'javascript:' + bookmarkletCode); | |
</script> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Function to scroll the container | |
async function scrollToBottom(containerId, interval = 1000) { | |
const container = document.getElementById(containerId); | |
if (!container) { | |
console.error(`Container with ID "${containerId}" not found.`); | |
return; | |
} | |
const scrollStep = 300; // Height of one screen | |
let lastScrollTop = 0; | |
let scrollInterval; | |
return new Promise((resolve) => { | |
scrollInterval = setInterval(() => { | |
container.scrollBy(0, scrollStep + Math.random() * 100); // Scroll by one screen height | |
// Check if the bottom of the container is reached | |
if (container.scrollTop === lastScrollTop) { | |
clearInterval(scrollInterval); // Stop scrolling | |
scrollInterval = null; | |
console.log("Reached the bottom of the container."); | |
resolve(); // Resolve the promise when scrolling is complete | |
} else { | |
lastScrollTop = container.scrollTop; | |
} | |
}, interval + Math.random() * 1000); | |
}); | |
} | |
// // Call the function to scroll the container | |
// scrollToBottom("search-results-container", 1000); | |
// Function to extract useful data | |
function extractProfiles() { | |
const profiles = []; | |
// Select all elements with class "artdeco-entity-lockup" | |
const lockups = document.querySelectorAll( | |
"[data-view-name='search-results-entity']" | |
); | |
lockups.forEach((lockup) => { | |
const profile = {}; | |
// Extract name | |
const nameElement = lockup.querySelector( | |
'span[data-anonymize="person-name"]' | |
); | |
profile.name = nameElement ? nameElement.textContent.trim() : null; | |
// Extract profile link | |
const profileLinkElement = lockup.querySelector( | |
'a[data-lead-search-result*="profile-link"]' | |
); | |
profile.profileLink = profileLinkElement ? profileLinkElement.href : null; | |
// Extract job title | |
const jobTitleElement = lockup.querySelector( | |
'span[data-anonymize="title"]' | |
); | |
profile.jobTitle = jobTitleElement | |
? jobTitleElement.textContent.trim() | |
: null; | |
const aboutElement = lockup.querySelector( | |
".flex.flex-column dl .inline-flex.align-items-baseline dd" | |
); | |
profile.about = aboutElement ? aboutElement.textContent.trim() : null; | |
// Extract company name | |
const companyElement = lockup.querySelector( | |
'a[data-anonymize="company-name"]' | |
); | |
profile.company = companyElement ? companyElement.textContent.trim() : null; | |
// Extract company link | |
profile.companyLink = companyElement ? companyElement.href : null; | |
// Extract location | |
const locationElement = lockup.querySelector( | |
'span[data-anonymize="location"]' | |
); | |
profile.location = locationElement | |
? locationElement.textContent.trim() | |
: null; | |
// Extract experience details | |
const experienceElement = lockup.querySelector( | |
".artdeco-entity-lockup__metadata" | |
); | |
profile.experience = experienceElement | |
? experienceElement.textContent.trim() | |
: null; | |
// Extract recent activity summary | |
const recentActivityElement = lockup.querySelector( | |
"button[data-sales-action]" | |
); | |
profile.recentActivity = recentActivityElement | |
? recentActivityElement.textContent.trim() | |
: null; | |
profiles.push(profile); | |
}); | |
return profiles; | |
} | |
// // // Call the function and log the results | |
// const extractedProfiles = extractProfiles(); | |
// console.log(extractedProfiles); | |
// Function to convert profiles to CSV and download | |
function downloadProfilesAsCSV(profiles) { | |
// Define CSV headers | |
const headers = [ | |
"Name", | |
"Profile Link", | |
"Job Title", | |
"About", | |
"Company", | |
"Company Link", | |
"Location", | |
"Experience", | |
"Recent Activity", | |
]; | |
// Convert profiles to CSV rows | |
const csvRows = profiles.map((profile) => | |
[ | |
profile.name || "", | |
profile.profileLink || "", | |
profile.jobTitle || "", | |
profile.about || "", | |
profile.company || "", | |
profile.companyLink || "", | |
profile.location || "", | |
profile.experience || "", | |
profile.recentActivity || "", | |
] | |
.map((field) => `"${field.replace(/"/g, '""')}"`) | |
.join(",") | |
); | |
// Combine headers and rows | |
const csvContent = [headers.join(","), ...csvRows].join("\n"); | |
// Create blob and download | |
const blob = new Blob([csvContent], { type: "text/csv;charset=utf-8;" }); | |
const link = document.createElement("a"); | |
const url = URL.createObjectURL(blob); | |
link.setAttribute("href", url); | |
link.setAttribute( | |
"download", | |
`linkedin_profiles_${new Date().toISOString().split("T")[0]}.csv` | |
); | |
link.style.visibility = "hidden"; | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
} | |
async function scrapeProfiles() { | |
try { | |
// Helper function to wait for a specified time | |
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); | |
// Select pagination container | |
const paginationContainer = document.querySelector( | |
".artdeco-pagination__pages" | |
); | |
if (!paginationContainer) { | |
throw new Error("Pagination container not found."); | |
} | |
// Get total number of pages | |
const lastPageButton = paginationContainer.querySelector( | |
"li[data-test-pagination-page-btn]:last-child span" | |
); | |
const totalPages = lastPageButton | |
? parseInt(lastPageButton.textContent.trim()) | |
: 1; | |
console.log(`Total pages: ${totalPages}`); | |
// Profiles array to store extracted data | |
const profiles = []; | |
for (let page = 1; page <= totalPages; page++) { | |
console.log(`Processing page ${page}...`); | |
// Scroll the search results container to load all profiles | |
const resultsContainer = document.querySelector( | |
"#search-results-container" | |
); | |
if (!resultsContainer) { | |
throw new Error("Search results container not found."); | |
} | |
await scrollToBottom("search-results-container"); | |
// Extract profiles on the current page | |
const profileElements = extractProfiles(); | |
profiles.push(...profileElements); | |
window.profiles = profiles; | |
console.log( | |
`Extracted ${profileElements.length} profiles from page ${page}.` | |
); | |
// Navigate to the next page | |
const nextPageButton = document.querySelector( | |
".artdeco-pagination__button--next:not([disabled])" | |
); | |
if (nextPageButton && page < totalPages) { | |
nextPageButton.click(); | |
await wait(5000); // Wait for the next page to load | |
} else if (!nextPageButton && page < totalPages) { | |
throw new Error("Next page button not found or disabled."); | |
} | |
} | |
console.log("Scraping completed."); | |
console.log("Extracted profiles:", profiles); | |
return profiles.filter(t=>t.name); | |
} catch (error) { | |
console.error("An error occurred:", error); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment