Skip to content

Instantly share code, notes, and snippets.

@soup-bowl
Last active October 7, 2025 09:58
Show Gist options
  • Select an option

  • Save soup-bowl/db9664f60e4b8500fc7bd077f052d9ba to your computer and use it in GitHub Desktop.

Select an option

Save soup-bowl/db9664f60e4b8500fc7bd077f052d9ba to your computer and use it in GitHub Desktop.
Scrape bin collection data from Three Rivers District Council site - Pupeteer script for Browserless
// https://gist.github.com/soup-bowl/db9664f60e4b8500fc7bd077f052d9ba
export default async ({ page }) => {
const POSTCODE = ''; // e.g. "WD3 1AB"
const ADDRESS_TEXT = ''; // e.g. "10 High Street"
const SELECTORS = {
anonBtn: '#btn-anon',
acceptBtn: 'button.consent-accept.btn-success',
formIframe: '#fillform-frame-1',
postcode: '#postcode_search',
addressSelect: '#chooseAddress',
nextBtn: 'button.btn-af.nextbutton',
calendarRows:
'div[data-field-name="subCollectionCalendar"] table.repeatable-table tbody tr.repeatable-value',
};
async function clickAndMaybeNavigate(page, selector) {
await page.waitForSelector(selector, { visible: true });
await Promise.all([
page.waitForNavigation({ waitUntil: 'networkidle2' }).catch(() => { }),
page.click(selector),
]);
}
async function getFrameBySelector(page, iframeSelector) {
const handle = await page.waitForSelector(iframeSelector, { visible: true });
const frame = await handle.contentFrame();
if (!frame) throw new Error(`Iframe not loaded: ${iframeSelector}`);
return frame;
}
async function typeWithChange(frame, selector, value) {
await frame.waitForSelector(selector, { visible: true });
// Focus → clear → type → fire input/change → blur
await frame.click(selector, { clickCount: 1 });
await frame.evaluate((sel) => {
const el = document.querySelector(sel);
el.value = '';
}, selector);
if (value) {
await frame.type(selector, value, { delay: 60 });
}
await frame.evaluate((sel) => {
const el = document.querySelector(sel);
el.dispatchEvent(new Event('input', { bubbles: true }));
el.dispatchEvent(new Event('change', { bubbles: true }));
el.blur();
}, selector);
}
async function waitForOptions(frame, selectSel, minOptions = 2) {
await frame.waitForSelector(selectSel, { visible: true });
await frame.waitForFunction(
(sel, min) => {
const el = document.querySelector(sel);
return !!el && el.options && el.options.length >= min;
},
{ polling: 'mutation', timeout: 30000 },
selectSel,
minOptions
);
}
async function selectAddress(frame, selectSel, visibleText) {
await frame.evaluate(
(sel, text) => {
const select = document.querySelector(sel);
const opts = Array.from(select.options);
let target =
text &&
opts.find((o) => (o.textContent || '').toLowerCase().includes(text.toLowerCase()));
if (!target) {
target = opts.find((o) => !o.disabled && o.value && o.value.trim() !== '');
}
if (!target) return;
select.value = target.value;
select.dispatchEvent(new Event('input', { bubbles: true }));
select.dispatchEvent(new Event('change', { bubbles: true }));
},
selectSel,
visibleText || ''
);
}
async function clickNext(page, frame, nextSelector) {
// Ensure the button exists
await frame.waitForSelector(nextSelector, { visible: true });
// Press Tab globally (works even if focus is in iframe)
await page.keyboard.press('Tab');
await sleep(500); // allow blur/validation to settle
// Click Next inside the frame
await frame.click(nextSelector, { clickCount: 1 });
}
async function findFrameWithSelector(page, selector, timeoutMs = 30000) {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
for (const f of page.frames()) {
const el = await f.$(selector);
if (el) {
await el.dispose();
return f;
}
}
await sleep(200);
}
throw new Error(`Timed out after ${timeoutMs}ms waiting for selector in any frame: ${selector}`);
}
async function scrapeCalendarRows(frame, rowSelector) {
await frame.waitForSelector(rowSelector, { visible: true });
return frame.evaluate((sel) => {
const rows = Array.from(document.querySelectorAll(sel));
return rows.map((tr) => {
const cells = tr.querySelectorAll('td.value');
const bin = (cells[1]?.querySelector('span:last-child')?.textContent || '').trim();
const date = (cells[2]?.querySelector('span:last-child')?.textContent || '').trim();
return { 'Bin type': bin, 'Next collection date': date };
});
}, rowSelector);
}
function sleep(ms) {
return new Promise((r) => setTimeout(r, ms));
}
// -----------------
// 1) Entry + opt out of login + accept
await page.goto(
'https://my.threerivers.gov.uk/AchieveForms/?mode=fill&form_uri=sandbox-publish://AF-Process-52df96e3-992a-4b39-bba3-06cfaabcb42b/AF-Stage-01ee28aa-1584-442c-8d1f-119b6e27114a/definition.json',
{ waitUntil: 'networkidle2' }
);
await clickAndMaybeNavigate(page, SELECTORS.anonBtn);
await clickAndMaybeNavigate(page, SELECTORS.acceptBtn);
// 2) Work inside the form iframe
const frame = await getFrameBySelector(page, SELECTORS.formIframe);
// 3) Postcode → wait for address options → select address
await typeWithChange(frame, SELECTORS.postcode, POSTCODE);
await page.keyboard.press('Tab');
await sleep(500);
await waitForOptions(frame, SELECTORS.addressSelect, 2);
await selectAddress(frame, SELECTORS.addressSelect, ADDRESS_TEXT);
await page.keyboard.press('Tab');
await sleep(500);
// 4) Next stage
await clickNext(page, frame, SELECTORS.nextBtn);
// 5) Find the table (any frame), then scrape it
const tableFrame = await findFrameWithSelector(page, SELECTORS.calendarRows, 30000);
const rows = await scrapeCalendarRows(tableFrame, SELECTORS.calendarRows);
// Return JSON array of objects
console.log(JSON.stringify(rows, null, 2));
return rows;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment