Created
May 16, 2024 17:03
-
-
Save pereayats/ebf4ff427af82434b9296b436642f29b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const axios = require('axios') | |
const cheerio = require('cheerio') | |
const fs = require('fs') | |
function waitUntil(t) { | |
return new Promise(r => { | |
setTimeout(r, t) | |
}) | |
} | |
async function getWebsiteContent(url) { | |
try { | |
const response = await axios.get(url) | |
return response.data | |
} | |
catch (error) { | |
return null | |
} | |
} | |
async function scrapeAppsCategories() { | |
let url = 'https://sasi.unionworks.co.uk/categories'; | |
const categories = [] | |
try { | |
const html_content = await getWebsiteContent(url) | |
if (html_content) { | |
const $ = cheerio.load(html_content) | |
$('.container .card a').each((i, link) => { | |
const href = $(link).attr('href'); | |
if (href && href.includes('sasi.unionworks.co.uk/category') && !categories.includes(href)) { | |
categories.push(href) | |
} | |
}) | |
return categories | |
} | |
else return categories | |
} | |
catch (error) { | |
return categories | |
} | |
} | |
async function findAppLinksByCategory(url) { | |
const links = [] | |
try { | |
let html_content = await getWebsiteContent(url) | |
if (html_content) { | |
const $ = cheerio.load(html_content) | |
let last_page = 1 | |
$('.pagination .page-item a').each((i, element) => { | |
const page = $(element).html() | |
if (page && Number(page)) last_page = Number(page) | |
}) | |
$('small a').each((i, link) => { | |
const href = $(link).attr('href') | |
if (href && href.includes('apps.shopify.com') && !links.includes(href)) { | |
links.push(href) | |
} | |
}) | |
if (last_page > 1) { | |
for (let page = 2; page <= last_page; page++) { | |
html_content = await getWebsiteContent(`${url}?page=${page}`) | |
if (html_content) { | |
const $ = cheerio.load(html_content) | |
$('small a').each((i, link) => { | |
const href = $(link).attr('href') | |
if (href && href.includes('apps.shopify.com') && !links.includes(href)) { | |
links.push(href) | |
} | |
}) | |
} | |
} | |
} | |
return links | |
} | |
else return links | |
} | |
catch (error) { | |
return links | |
} | |
} | |
async function getAppInfo(url) { | |
try { | |
const html_content = await getWebsiteContent(url) | |
if (html_content) { | |
const $ = cheerio.load(html_content) | |
const info = {} | |
$('h1').each((i, h1) => { | |
const app_name = $(h1).html()?.trim() | |
info._id = url?.split('/')?.pop()?.toLowerCase() | |
info.name = app_name | |
info.url = url | |
}) | |
$('a[href*="/partners/"]').each((i, link) => { | |
const vendor = $(link).html()?.trim() | |
const href = $(link).attr('href') | |
if (!info.vendor_url && !info.vendor_name) { | |
info.vendor_name = vendor | |
info.vendor_url = 'https://apps.shopify.com' + href | |
info.vendor_id = info?.vendor_url?.split('/')?.pop()?.toLowerCase() | |
} | |
}) | |
$('img[src*="cdn.shopify.com/app-store/listing_images"]').each((i, img) => { | |
const src = $(img).attr('src') | |
if (!info.icon_url && src) info.icon_url = src | |
}) | |
return info | |
} | |
else return null | |
} | |
catch (error) { | |
return null | |
} | |
} | |
const main = (async () => { | |
const categories = await scrapeAppsCategories() | |
const apps = [] | |
const output = [] | |
for (let category of categories) { | |
await waitUntil(1000) | |
let links = await findAppLinksByCategory(category) | |
apps.push(... links) | |
} | |
for (let app of apps) { | |
await waitUntil(5000) | |
let info = await getAppInfo(app) | |
if (info) output.push(info) | |
} | |
fs.writeFileSync('shopify_apps.json', JSON.stringify(output, null, 2)) | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment