Created
November 8, 2024 09:50
-
-
Save oscarmarina/9c4a61f690d982eca6ca79dd056fa5a4 to your computer and use it in GitHub Desktop.
Node.js script that retrieves and processes NPM package dependents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {writeFile} from 'fs/promises'; | |
import * as cheerio from 'cheerio'; | |
// https://github.com/badgen/badgen.net/blob/main/pages/api/npm.ts#L165 | |
// https://github.com/npm/documentation/issues/1231 | |
/** | |
* Prompt: | |
* | |
* Hello! I need a well-structured Node.js script that retrieves and processes NPM package dependents with these specific requirements: | |
* | |
* 1. Input: Accept an array of NPM package names (e.g., ['lit', 'react']). | |
* | |
* 2. HTTP Request: For each package, make requests to `https://www.npmjs.com/browse/depended/${packageName}` using `fetch`. | |
* | |
* 3. HTML Parsing: Use Cheerio (imported as `import * as cheerio from 'cheerio'`) to parse the HTML. Select all dependent package names using the selector `$('h1 + div h3')`, and store them in a `Set` to avoid duplicates. | |
* | |
* 4. Pagination Handling: | |
* - Use the link selector `$('a[href*="?offset="]')` to identify and navigate to the next page if it exists. | |
* - Update the `offset` parameter by increments of 36 for each subsequent page, and continue until no further pages are available. | |
* | |
* 5. Output Format: For each package, return: | |
* - packageName: The name of the package. | |
* - dependents: An array of unique dependent package names. | |
* - totalDependents: Total count of dependents. | |
* | |
* 6. Error Handling: Catch and log specific errors (e.g., network or parsing errors) during the request or parsing phase. | |
* | |
* 7. Efficiency: | |
* - Consider optimizations like concurrency or caching for handling multiple packages. | |
* | |
* 8. Output & Comments: Log results clearly and include helpful comments in the code for maintainability. | |
* | |
* Ensure the code uses ES Modules and async/await, with clear, modular structure and best practices. Thank you! | |
* | |
*/ | |
async function fetchDependents(packageName) { | |
const baseUrl = `https://www.npmjs.com`; | |
let url = `${baseUrl}/browse/depended/${packageName}`; | |
const allDependents = new Set(); | |
let offset = 0; | |
while (url) { | |
const res = await fetch(url); | |
const html = await res.text(); | |
const $ = cheerio.load(html); | |
$('h1 + div h3').each((_, element) => { | |
const dependentName = $(element).text().trim(); | |
allDependents.add(dependentName); | |
}); | |
const nextPageLinkElement = $('[href*="?offset="]').attr('href'); | |
if (nextPageLinkElement) { | |
offset += 36; | |
url = `${baseUrl}/browse/depended/${packageName}?offset=${offset}`; | |
} else { | |
url = ''; | |
} | |
} | |
return {packageName, dependents: Array.from(allDependents), totalDependents: allDependents.size}; | |
} | |
async function fetchDependentsForPackages(packages) { | |
const results = await Promise.all(packages.map(fetchDependents)); | |
return results; | |
} | |
const packages = ['lit']; | |
fetchDependentsForPackages(packages) | |
.then(async (data) => { | |
await writeFile('fetch-dependents-for-packages.json', JSON.stringify(data, null, 2), 'utf8'); | |
}) | |
.catch((err) => { | |
console.error('Error fetching dependents:', err); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment