Last active
June 30, 2022 04:24
-
-
Save Jared-Sprague/170176d12fee2506b833cc2076940138 to your computer and use it in GitHub Desktop.
Rust download 11000 files in < 1 min
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use futures::stream::StreamExt; | |
use reqwest::Client; | |
use serde::Deserialize; | |
use std::fs; | |
use std::io::prelude::*; | |
use std::path::Path; | |
use std::time::Duration; | |
use anyhow::Context; | |
use std::fs::{create_dir_all, File}; | |
#[derive(Deserialize, Clone)] | |
struct Asset { | |
src: String, | |
dest: String, | |
} | |
async fn _download_text(client: reqwest::Client, asset: Asset) -> anyhow::Result<String> { | |
let resp = client | |
.get(&asset.src) | |
.send() | |
.await | |
.with_context(|| format!("ERROR getting {}", asset.src))?; | |
let text = resp | |
.text() | |
.await | |
.with_context(|| format!("ERROR reading {}", asset.src))?; | |
Ok(text) | |
} | |
impl Asset { | |
async fn download(self, client: reqwest::Client) -> anyhow::Result<(usize, Asset)> { | |
let text = again::retry(|| _download_text(client.clone(), self.clone())) | |
.await | |
.with_context(|| format!("ERROR downloading text {}", self.src))?; | |
let path = Path::new(self.dest.as_str()); | |
if let Some(prefix) = path.parent() { | |
create_dir_all(prefix)?; | |
} | |
let mut file = File::create(&path)?; | |
file.write_all(text.as_bytes()) | |
.context("Error writing file contents")?; | |
Ok((text.len(), self.clone())) | |
} | |
} | |
#[tokio::main] | |
async fn main() -> Result<(), anyhow::Error> { | |
env_logger::init(); | |
println!("Downloading assets .."); | |
// Max number of concurrent download tasks | |
const MAX_CONNCURENT: usize = 50; | |
let assets: Vec<Asset> = | |
serde_json::from_str(&fs::read_to_string("./title_asset_manifest_simple.json")?)?; | |
let total = assets.len(); | |
// Client's documentation explicitly notes that it is efficiently clonable - it's a reference count increment and a pointer copy | |
let client = Client::builder() | |
.timeout(Duration::from_secs(15)) | |
.build() | |
.context("Failed to build reqwest client")?; | |
let mut fetches = futures::stream::iter( | |
assets | |
.into_iter() | |
.map(|asset| asset.download(client.clone())), | |
) | |
.buffer_unordered(MAX_CONNCURENT); | |
let mut successful_downloads = 0; | |
while let Some(fetch) = fetches.next().await { | |
match fetch { | |
Ok((bytes, asset)) => { | |
successful_downloads += 1; | |
println!( | |
"Downloaded {}/{} bytes {}", | |
successful_downloads, total, bytes | |
); | |
} | |
Err(e) => { | |
eprintln!("{}", e) | |
} | |
} | |
} | |
println!("Downloaded {} of {} files.", successful_downloads, total); | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment