Created
June 3, 2025 14:24
-
-
Save FCO/8f49f503a4fcbe277672ab4a908b9e0c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use Cro::HTTP::Client; | |
use Cro::Uri; | |
use DOM::Tiny; | |
my Cro::HTTP::Client $client .= new; | |
sub scrap($url is copy) { | |
$url = Cro::Uri.parse: $url if $url ~~ Str; | |
return unless $url.defined && $url.scheme.starts-with: "http"; | |
gather { | |
CATCH { | |
default {.say} | |
} | |
try { | |
my $req = await $client.get: $url.Str; | |
say "got: $url => ", $req.status; | |
my $type = $req.header("Content-type"); | |
if $type.starts-with: "text/html" { | |
for DOM::Tiny.parse(await $req.body).find: "a" -> $link { | |
my $link-url = $url.add: $link<href> | |
; | |
.take with $link-url | |
} | |
} | |
} | |
} | |
} | |
sub MAIN( | |
*@urls where * >= 1 #= Urls to start scrapping | |
) { | |
my Supplier $supplier .= new; | |
my Supply $supply = $supplier.Supply.unique: :as{.Str}; | |
react { | |
whenever $supply -> Cro::Uri $url { | |
start for scrap $url -> $new { | |
next without $new; | |
$supplier.emit: $new; | |
} | |
} | |
$supplier.emit: Cro::Uri.parse: $_ for @urls | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment