Skip to content

Instantly share code, notes, and snippets.

@FCO
Created June 3, 2025 14:24
Show Gist options
  • Save FCO/8f49f503a4fcbe277672ab4a908b9e0c to your computer and use it in GitHub Desktop.
Save FCO/8f49f503a4fcbe277672ab4a908b9e0c to your computer and use it in GitHub Desktop.
use Cro::HTTP::Client;
use Cro::Uri;
use DOM::Tiny;
my Cro::HTTP::Client $client .= new;
sub scrap($url is copy) {
$url = Cro::Uri.parse: $url if $url ~~ Str;
return unless $url.defined && $url.scheme.starts-with: "http";
gather {
CATCH {
default {.say}
}
try {
my $req = await $client.get: $url.Str;
say "got: $url => ", $req.status;
my $type = $req.header("Content-type");
if $type.starts-with: "text/html" {
for DOM::Tiny.parse(await $req.body).find: "a" -> $link {
my $link-url = $url.add: $link<href>
;
.take with $link-url
}
}
}
}
}
sub MAIN(
*@urls where * >= 1 #= Urls to start scrapping
) {
my Supplier $supplier .= new;
my Supply $supply = $supplier.Supply.unique: :as{.Str};
react {
whenever $supply -> Cro::Uri $url {
start for scrap $url -> $new {
next without $new;
$supplier.emit: $new;
}
}
$supplier.emit: Cro::Uri.parse: $_ for @urls
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment