Skip to content

Instantly share code, notes, and snippets.

if you don't know the answer, try to search it on the internet. You can do so by running the following command:
```
npx @uratmangun/scraper-tool search "<query>"
```
Let's say the query is "how to install frog.fm".
```
npx @uratmangun/scraper-tool search "how to install frog.fm"
{
"name": "@uratmangun/scraper-tool",
"version": "1.0.0",
"description": "A CLI tool for web scraping and Google search",
"type": "module",
"bin": {
"@uratmangun/scraper-tool": "./bin/cli.js"
},
"files": [
"bin",
#!/usr/bin/env node
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { spawn } from 'child_process';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const scriptPath = join(__dirname, '..', 'scripts', 'scrape-or-search.mjs');
import { chromium } from 'playwright';
import { fileURLToPath } from 'url';
import dotenv from 'dotenv';
import { searchParser } from './convert-search.mjs';
dotenv.config({ path: '.env.local' });
/**
* Scrapes content from a given URL using Playwright with CDP connection
* @param {string} url - The URL to scrape
import OpenAI from "openai"
import dotenv from 'dotenv';
import { zodResponseFormat } from "openai/helpers/zod";
import { z } from "zod";
dotenv.config({ path: '.env.local' });
const openai = new OpenAI({
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
apiKey: process.env.GEMINI_API_KEY
})
const SearchResult = z.array(
import { chromium } from 'playwright';
import dotenv from 'dotenv';
// Load environment variables
dotenv.config({ path: '.env.local' });
async function getGoogleHtml(query = '', baseUrl = 'https://www.google.com/search?q=') {
const browser = await chromium.connectOverCDP(process.env.BRIGHT_PLAYWRIGHT_URL);
const searchUrl = baseUrl + encodeURIComponent(query);
import { chromium } from 'playwright';
import { fileURLToPath } from 'url';
import dotenv from 'dotenv';
dotenv.config({ path: '.env.local' });
/**
* Scrapes content from a given URL using Playwright with CDP connection
* @param {string} url - The URL to scrape
* @returns {Promise<string>} - The scraped content
*/
async function getAllItems(page) {
const products = await page.$$eval('div.list-product-items', (productElements) => {
return productElements.map(productElement => {
const titleElement = productElement.querySelector('p.product_name');
const priceElement = productElement.querySelector('p.price span');
const imageElement = productElement.querySelector('img');
const availabilityElement = productElement.querySelector('.availability-label');
const title = titleElement?.textContent?.trim() || null;
async function getCategories(page) {
console.log('Navigating to https://alfagift.id/');
await page.goto('https://alfagift.id/');
console.log('Waiting for response...');
// Wait for the specific categories menu container
// await page.waitForSelector('//*[@id="__layout"]/div/div[1]/header[1]/div[2]/div/div/div[1]/div[1]/div/div/div/div[2]/div/div/div');
// Get and log the content of the categories container
const containerContent = await page.evaluate(() => {
const container = document.evaluate(
import * as pw from 'playwright';
const AUTH = '';
const SBR_CDP = `wss://${AUTH}@brd.superproxy.io:9222`;
async function main() {
console.log('Connecting to Scraping Browser...');
const browser = await pw.chromium.connectOverCDP(SBR_CDP);
try {
console.log('Connected! Navigating...');
const page = await browser.newPage();
await page.goto('https://example.com', { timeout: 2 * 60 * 1000 });