Skip to content

Instantly share code, notes, and snippets.

@rudiedirkx
Created May 6, 2025 20:04
Show Gist options
  • Save rudiedirkx/2bd18227db34ea6a30fe920f57c000da to your computer and use it in GitHub Desktop.
Save rudiedirkx/2bd18227db34ea6a30fe920f57c000da to your computer and use it in GitHub Desktop.
<?php
use GuzzleHttp\Client; // guzzlehttp/guzzle
use rdx\jsdom\Node; // rdx/js-dom
require 'vendor/autoload.php';
$guzzle = new Client();
if (file_exists($file = 'cache/padelgids.html')) {
$html = file_get_contents($file);
}
else {
$rsp = $guzzle->get('https://padelgids.nl/booking/knltb-meet-play/?type=&display=table');
$html = (string) $rsp->getBody();
file_put_contents($file, $html);
}
$dom = Node::create($html, 'utf-8');
$links = $dom->queryAll('tr div + a');
$total = count($links);
var_dump($total);
$done = 0;
foreach ($links as $link) {
$path = explode('?', $link['href'])[0];
$url = 'https://padelgids.nl' . $path;
echo $url, "...\n";
$file = preg_replace('#[^0-9a-z-]+#i', '-', $path);
$file = preg_replace('#\-+#', '-', $file);
$file = 'cache/' . trim($file, '-') . '.html';
// echo $file, "\n";
if (file_exists($file)) {
$html = file_get_contents($file);
}
else {
if ($done > 0) {
usleep(rand(700, 1400) * 1000);
}
$rsp = $guzzle->get($url);
$html = (string) $rsp->getBody();
file_put_contents($file, $html);
}
$dom = Node::create($html, 'utf-8');
$h1 = $dom->query('h1');
// var_dump($h1->textContent);
$mail = $dom->query('.social-link a[href^="mailto:"]');
// var_dump($mail->textContent);
$phone = $dom->query('.social-link a[href^="tel:"]');
// var_dump($phone->textContent);
$city = $dom->query('.social-link br + a.text-dark');
// var_dump($city->textContent);
file_put_contents('output.csv', sprintf("%s\t%s\t%s\t%s\t\t%s\n", $h1?->textContent, $city?->textContent, $mail?->textContent, $phone?->textContent, $url), FILE_APPEND);
$done++;
if (($done % 10) == 0) {
echo " $done / $total\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment