Last active
October 7, 2025 09:58
-
-
Save soup-bowl/db9664f60e4b8500fc7bd077f052d9ba to your computer and use it in GitHub Desktop.
Scrape bin collection data from Three Rivers District Council site - Pupeteer script for Browserless
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // https://gist.github.com/soup-bowl/db9664f60e4b8500fc7bd077f052d9ba | |
| export default async ({ page }) => { | |
| const POSTCODE = ''; // e.g. "WD3 1AB" | |
| const ADDRESS_TEXT = ''; // e.g. "10 High Street" | |
| const SELECTORS = { | |
| anonBtn: '#btn-anon', | |
| acceptBtn: 'button.consent-accept.btn-success', | |
| formIframe: '#fillform-frame-1', | |
| postcode: '#postcode_search', | |
| addressSelect: '#chooseAddress', | |
| nextBtn: 'button.btn-af.nextbutton', | |
| calendarRows: | |
| 'div[data-field-name="subCollectionCalendar"] table.repeatable-table tbody tr.repeatable-value', | |
| }; | |
| async function clickAndMaybeNavigate(page, selector) { | |
| await page.waitForSelector(selector, { visible: true }); | |
| await Promise.all([ | |
| page.waitForNavigation({ waitUntil: 'networkidle2' }).catch(() => { }), | |
| page.click(selector), | |
| ]); | |
| } | |
| async function getFrameBySelector(page, iframeSelector) { | |
| const handle = await page.waitForSelector(iframeSelector, { visible: true }); | |
| const frame = await handle.contentFrame(); | |
| if (!frame) throw new Error(`Iframe not loaded: ${iframeSelector}`); | |
| return frame; | |
| } | |
| async function typeWithChange(frame, selector, value) { | |
| await frame.waitForSelector(selector, { visible: true }); | |
| // Focus → clear → type → fire input/change → blur | |
| await frame.click(selector, { clickCount: 1 }); | |
| await frame.evaluate((sel) => { | |
| const el = document.querySelector(sel); | |
| el.value = ''; | |
| }, selector); | |
| if (value) { | |
| await frame.type(selector, value, { delay: 60 }); | |
| } | |
| await frame.evaluate((sel) => { | |
| const el = document.querySelector(sel); | |
| el.dispatchEvent(new Event('input', { bubbles: true })); | |
| el.dispatchEvent(new Event('change', { bubbles: true })); | |
| el.blur(); | |
| }, selector); | |
| } | |
| async function waitForOptions(frame, selectSel, minOptions = 2) { | |
| await frame.waitForSelector(selectSel, { visible: true }); | |
| await frame.waitForFunction( | |
| (sel, min) => { | |
| const el = document.querySelector(sel); | |
| return !!el && el.options && el.options.length >= min; | |
| }, | |
| { polling: 'mutation', timeout: 30000 }, | |
| selectSel, | |
| minOptions | |
| ); | |
| } | |
| async function selectAddress(frame, selectSel, visibleText) { | |
| await frame.evaluate( | |
| (sel, text) => { | |
| const select = document.querySelector(sel); | |
| const opts = Array.from(select.options); | |
| let target = | |
| text && | |
| opts.find((o) => (o.textContent || '').toLowerCase().includes(text.toLowerCase())); | |
| if (!target) { | |
| target = opts.find((o) => !o.disabled && o.value && o.value.trim() !== ''); | |
| } | |
| if (!target) return; | |
| select.value = target.value; | |
| select.dispatchEvent(new Event('input', { bubbles: true })); | |
| select.dispatchEvent(new Event('change', { bubbles: true })); | |
| }, | |
| selectSel, | |
| visibleText || '' | |
| ); | |
| } | |
| async function clickNext(page, frame, nextSelector) { | |
| // Ensure the button exists | |
| await frame.waitForSelector(nextSelector, { visible: true }); | |
| // Press Tab globally (works even if focus is in iframe) | |
| await page.keyboard.press('Tab'); | |
| await sleep(500); // allow blur/validation to settle | |
| // Click Next inside the frame | |
| await frame.click(nextSelector, { clickCount: 1 }); | |
| } | |
| async function findFrameWithSelector(page, selector, timeoutMs = 30000) { | |
| const start = Date.now(); | |
| while (Date.now() - start < timeoutMs) { | |
| for (const f of page.frames()) { | |
| const el = await f.$(selector); | |
| if (el) { | |
| await el.dispose(); | |
| return f; | |
| } | |
| } | |
| await sleep(200); | |
| } | |
| throw new Error(`Timed out after ${timeoutMs}ms waiting for selector in any frame: ${selector}`); | |
| } | |
| async function scrapeCalendarRows(frame, rowSelector) { | |
| await frame.waitForSelector(rowSelector, { visible: true }); | |
| return frame.evaluate((sel) => { | |
| const rows = Array.from(document.querySelectorAll(sel)); | |
| return rows.map((tr) => { | |
| const cells = tr.querySelectorAll('td.value'); | |
| const bin = (cells[1]?.querySelector('span:last-child')?.textContent || '').trim(); | |
| const date = (cells[2]?.querySelector('span:last-child')?.textContent || '').trim(); | |
| return { 'Bin type': bin, 'Next collection date': date }; | |
| }); | |
| }, rowSelector); | |
| } | |
| function sleep(ms) { | |
| return new Promise((r) => setTimeout(r, ms)); | |
| } | |
| // ----------------- | |
| // 1) Entry + opt out of login + accept | |
| await page.goto( | |
| 'https://my.threerivers.gov.uk/AchieveForms/?mode=fill&form_uri=sandbox-publish://AF-Process-52df96e3-992a-4b39-bba3-06cfaabcb42b/AF-Stage-01ee28aa-1584-442c-8d1f-119b6e27114a/definition.json', | |
| { waitUntil: 'networkidle2' } | |
| ); | |
| await clickAndMaybeNavigate(page, SELECTORS.anonBtn); | |
| await clickAndMaybeNavigate(page, SELECTORS.acceptBtn); | |
| // 2) Work inside the form iframe | |
| const frame = await getFrameBySelector(page, SELECTORS.formIframe); | |
| // 3) Postcode → wait for address options → select address | |
| await typeWithChange(frame, SELECTORS.postcode, POSTCODE); | |
| await page.keyboard.press('Tab'); | |
| await sleep(500); | |
| await waitForOptions(frame, SELECTORS.addressSelect, 2); | |
| await selectAddress(frame, SELECTORS.addressSelect, ADDRESS_TEXT); | |
| await page.keyboard.press('Tab'); | |
| await sleep(500); | |
| // 4) Next stage | |
| await clickNext(page, frame, SELECTORS.nextBtn); | |
| // 5) Find the table (any frame), then scrape it | |
| const tableFrame = await findFrameWithSelector(page, SELECTORS.calendarRows, 30000); | |
| const rows = await scrapeCalendarRows(tableFrame, SELECTORS.calendarRows); | |
| // Return JSON array of objects | |
| console.log(JSON.stringify(rows, null, 2)); | |
| return rows; | |
| }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment