Skip to content

Instantly share code, notes, and snippets.

@a-agmon
Last active July 7, 2024 05:58
Show Gist options
  • Save a-agmon/6526f0990b1d3ee73f8ae1c5ffa80bda to your computer and use it in GitHub Desktop.
Save a-agmon/6526f0990b1d3ee73f8ae1c5ffa80bda to your computer and use it in GitHub Desktop.
async fn main() -> anyhow::Result<()> {
// init the sync channel
let (sender, reciever) = std::sync::mpsc::channel::<EmbeddingEntry>();
// start the write task loop and get a handle to it
let db_writer_task = init_writer_task(reciever,db_uri.as_str());
// list the files in the directory to be embedded
let files_dir = fs::read_dir(cli_args.input_directory)?;
files_dir
.into_iter()
.map(|file| file.unwrap().path().to_str().unwrap().to_string())
.collect::<Vec<String>>()
// uses Rayon into_par_iter() to paralleize call
.into_par_iter()
// with Rayon each call here will run in parallel
.for_each(|filename| {
// read & embed the text file, then send to the channel
if let Err(e) = process_text_file(sender, filename) {
warn!("Error processing file: {}: Error:{}", filename, e)
}
});
// this will close the original channel
drop(sender);
// wait for the db writer task to finish before exiting
db_writer_task.await?;
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment