JosephCatrambone · March 8, 2024 01:32
diff --git a/lib.rs b/lib.rs

 mod ml_thread;

 use gdnative::prelude::{godot_print, methods, Method, NativeClass, Node as GDNode, InitHandle, godot_init};
 use ml_thread::start_language_model_thread;
 use std::sync::mpsc::{channel, Receiver, RecvError, Sender, SendError};


 const MAX_INPUT_LENGTH: usize = 512;
 const BATCH_SIZE: usize = 1;

 // Contains our processing job and work IDs.
 #[derive(NativeClass)]
 #[inherit(GDNode)]
 pub struct ChatBot {
 	message_tx: Sender<String>,
 	response_rx: Receiver<String>
 }

 // Only one impl block can have [methods].
 #[methods]
 impl ChatBot {
 	/// The "constructor" of the class.
 	pub fn new(_base: &GDNode) -> Self {
 		let (tx, rx) = start_language_model_thread();
 		ChatBot {
 			message_tx: tx,
 			response_rx: rx,
 		}
 	}

 	pub fn make_reply(&self, text: &str, maxent: bool) -> String {
 		// If we use maxent, then we just pick the most likely word.
 		// Otherwise select probabalistically.
 		// No beam search yet.
 		self.message_tx.send(text.to_string()).expect("Child runner crashed.");
 		if let Some(msg) = self.response_rx.recv() {
 			return msg;
 		}
 		return "".into();
 	}

 	#[method]
 	fn _ready(&self, #[base] base: &GDNode) {
 		// The `godot_print!` macro works like `println!` but prints to the Godot-editor output tab as well.
 		godot_print!("Hello world from node {}!", base.to_string());
 	}

 	#[method]
 	fn process_user_query(&self, #[base] base: &GDNode, user_str: String) -> String {
 		// We could use GodotString, but there are different performance characteristics.  Let's try this one!
 		//godot_print!("Got a call to the process_user_query endpoint with {}", &user_str);
 		self.make_reply(&user_str, false)
 	}
 }

 // Function that registers all exposed classes to Godot
 fn init(handle: InitHandle) {
 	handle.add_class::<ChatBot>()
 }

 // Macro that creates the entry-points of the dynamic library.
 godot_init!(init);


 #[cfg(test)]
 mod tests {
 	use super::*;

 	#[test]
 	fn it_works() {
 		let cb = ChatBot::load();
 		let result = cb.make_reply("I give you one yike:", true);
 		println!("{result}");
 		//assert_eq!(result, 0);
 	}
 }
diff --git a/ml_thread.rs b/ml_thread.rs
 use ndarray::s;
 use std::{f32, path::{Path, PathBuf}, str::FromStr};
 use std::error::Error;
 use std::io::Cursor;
 use std::sync::mpsc::{channel, Receiver, RecvError, Sender};
 use std::thread;
 use tokenizers::tokenizer::{Result, Tokenizer};
 use tract_onnx::prelude::*;
 use tract_onnx::tract_hir::infer::InferenceOp;
 use rand::{Rng, thread_rng};
 use crate::{BATCH_SIZE, MAX_INPUT_LENGTH};

 fn run_language_model(input_channel: Receiver<String>, result_channel: Sender<String>) {
 	let tokenizer: Tokenizer = Tokenizer::from_str(include_str!("../model/tokenizer_gpt2.json")).expect("Failed to load packed tokenizer.  Library may be corrupt.");

 	// A little info on GPT-2:
 	// input1 - type: int64[input1_dynamic_axes_1,input1_dynamic_axes_2,input1_dynamic_axes_3]
 	// output1 - type: float32[input1_dynamic_axes_1,input1_dynamic_axes_2,input1_dynamic_axes_3,50257]
 	// output dims are [1, 50257].
 	let mut model_path = PathBuf::from_str("model").unwrap();
 	model_path.push(Path::new("model.onnx"));
 	let mut model_buf = Cursor::new(include_bytes!("../model/gpt-neo-2.onnx"));

 	//model: RunnableModel<TypedFact, Box<dyn TypedOp>, Graph<TypedFact, Box<dyn TypedOp>>>,
 	//model: SimplePlan<InferenceFact, Box<dyn InferenceOp>, Graph<InferenceFact, Box<dyn InferenceOp>>>,
 	let model = tract_onnx::onnx()
 		.model_for_read(&mut model_buf).expect("Unable to read from model built into binary.  This indicates corruption.")
 		//.model_for_path(model_path).unwrap()
 		.with_input_fact(0, i64::fact(&[BATCH_SIZE, MAX_INPUT_LENGTH]).into()).expect("Defining input fact size on preloaded language model.")
 		.with_input_fact(1, i64::fact(&[BATCH_SIZE, MAX_INPUT_LENGTH]).into()).expect("Defining input mask size on preloaded language model.")
 		//.with_output_fact(0, f32::fact(&[axis_1_shape, axis_2_shape, axis_3_shape, axis_4_shape]).into()).unwrap()
 		//.into_optimized().expect("Converting packaged model to optimized build failed.")
 		.into_runnable().expect("Converting optimized model into runnable model failed.");

 	loop {
 		match input_channel.recv() {
 			Ok(msg) => {


 				let tokenizer_output = tokenizer.encode(text, true).expect("Unable to encode input string.");
 				let token_ids = tokenizer_output.get_ids();
 				let mask: Tensor = tract_ndarray::Array2::from_shape_fn((1, MAX_INPUT_LENGTH), |idx|{ if idx.0 < MAX_INPUT_LENGTH && idx.1 < MAX_INPUT_LENGTH { 1i64 } else { 0i64 } }).into();
 				let token_tensor: Tensor = tract_ndarray::Array2::from_shape_fn((1, MAX_INPUT_LENGTH),|idx| { if idx.0 < token_ids.len() { token_ids[idx.0] as i64 } else { 0 as i64 } }).into();
 				//let token_tensor: Tensor = tract_ndarray::Array2::from_shape_vec((1, token_ids.len()),token_ids.iter().map(|&x| x as i64).collect()).unwrap().into();
 				let outputs = model.run(tvec!(token_tensor, mask)).expect("Failed to run model on token tensor.");
 				let logits = outputs[0].to_array_view::<f32>().expect("Unable to convert tensor output to f32 array.");
 				let word_id = if maxent {
 					logits.iter().zip(0..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap()).unwrap().1
 				} else {
 					let mut rng = thread_rng();
 					let mut energy = rng.gen::<f32>();
 					let mut selected_token = 0;
 					for (idx, token_energy_hill) in logits.iter().enumerate() {
 						if *token_energy_hill > energy {
 							selected_token = idx;
 							energy = 0.0;
 						} else {
 							energy -= *token_energy_hill;
 						}
 					}
 					selected_token as u32
 				};
 				let word = tokenizer.id_to_token(word_id).unwrap_or(" ".into());
 				result_channel.send(word).expect("Failed to send result.");
 			}
 			Err(_) => {
 				return;
 			}
 		}
 	}
 }

 pub fn start_language_model_thread() -> (Sender<String>, Receiver<String>) {
 	let (user_input_tx, user_input_rx) = channel();
 	let (ai_completion_tx, ai_completion_rx) = channel();

 	thread::spawn(move || run_language_model(user_input_rx, ai_completion_tx));

 	(user_input_tx, ai_completion_rx)
 }

	mod ml_thread;

	use gdnative::prelude::{godot_print, methods, Method, NativeClass, Node as GDNode, InitHandle, godot_init};
	use ml_thread::start_language_model_thread;
	use std::sync::mpsc::{channel, Receiver, RecvError, Sender, SendError};


	const MAX_INPUT_LENGTH: usize = 512;
	const BATCH_SIZE: usize = 1;

	// Contains our processing job and work IDs.
	#[derive(NativeClass)]
	#[inherit(GDNode)]
	pub struct ChatBot {
	message_tx: Sender<String>,
	response_rx: Receiver<String>
	}

	// Only one impl block can have [methods].
	#[methods]
	impl ChatBot {
	/// The "constructor" of the class.
	pub fn new(_base: &GDNode) -> Self {
	let (tx, rx) = start_language_model_thread();
	ChatBot {
	message_tx: tx,
	response_rx: rx,
	}
	}

	pub fn make_reply(&self, text: &str, maxent: bool) -> String {
	// If we use maxent, then we just pick the most likely word.
	// Otherwise select probabalistically.
	// No beam search yet.
	self.message_tx.send(text.to_string()).expect("Child runner crashed.");
	if let Some(msg) = self.response_rx.recv() {
	return msg;
	}
	return "".into();
	}

	#[method]
	fn _ready(&self, #[base] base: &GDNode) {
	// The `godot_print!` macro works like `println!` but prints to the Godot-editor output tab as well.
	godot_print!("Hello world from node {}!", base.to_string());
	}

	#[method]
	fn process_user_query(&self, #[base] base: &GDNode, user_str: String) -> String {
	// We could use GodotString, but there are different performance characteristics. Let's try this one!
	//godot_print!("Got a call to the process_user_query endpoint with {}", &user_str);
	self.make_reply(&user_str, false)
	}
	}

	// Function that registers all exposed classes to Godot
	fn init(handle: InitHandle) {
	handle.add_class::<ChatBot>()
	}

	// Macro that creates the entry-points of the dynamic library.
	godot_init!(init);


	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn it_works() {
	let cb = ChatBot::load();
	let result = cb.make_reply("I give you one yike:", true);
	println!("{result}");
	//assert_eq!(result, 0);
	}
	}
	use ndarray::s;
	use std::{f32, path::{Path, PathBuf}, str::FromStr};
	use std::error::Error;
	use std::io::Cursor;
	use std::sync::mpsc::{channel, Receiver, RecvError, Sender};
	use std::thread;
	use tokenizers::tokenizer::{Result, Tokenizer};
	use tract_onnx::prelude::*;
	use tract_onnx::tract_hir::infer::InferenceOp;
	use rand::{Rng, thread_rng};
	use crate::{BATCH_SIZE, MAX_INPUT_LENGTH};

	fn run_language_model(input_channel: Receiver<String>, result_channel: Sender<String>) {
	let tokenizer: Tokenizer = Tokenizer::from_str(include_str!("../model/tokenizer_gpt2.json")).expect("Failed to load packed tokenizer. Library may be corrupt.");

	// A little info on GPT-2:
	// input1 - type: int64[input1_dynamic_axes_1,input1_dynamic_axes_2,input1_dynamic_axes_3]
	// output1 - type: float32[input1_dynamic_axes_1,input1_dynamic_axes_2,input1_dynamic_axes_3,50257]
	// output dims are [1, 50257].
	let mut model_path = PathBuf::from_str("model").unwrap();
	model_path.push(Path::new("model.onnx"));
	let mut model_buf = Cursor::new(include_bytes!("../model/gpt-neo-2.onnx"));

	//model: RunnableModel<TypedFact, Box<dyn TypedOp>, Graph<TypedFact, Box<dyn TypedOp>>>,
	//model: SimplePlan<InferenceFact, Box<dyn InferenceOp>, Graph<InferenceFact, Box<dyn InferenceOp>>>,
	let model = tract_onnx::onnx()
	.model_for_read(&mut model_buf).expect("Unable to read from model built into binary. This indicates corruption.")
	//.model_for_path(model_path).unwrap()
	.with_input_fact(0, i64::fact(&[BATCH_SIZE, MAX_INPUT_LENGTH]).into()).expect("Defining input fact size on preloaded language model.")
	.with_input_fact(1, i64::fact(&[BATCH_SIZE, MAX_INPUT_LENGTH]).into()).expect("Defining input mask size on preloaded language model.")
	//.with_output_fact(0, f32::fact(&[axis_1_shape, axis_2_shape, axis_3_shape, axis_4_shape]).into()).unwrap()
	//.into_optimized().expect("Converting packaged model to optimized build failed.")
	.into_runnable().expect("Converting optimized model into runnable model failed.");

	loop {
	match input_channel.recv() {
	Ok(msg) => {


	let tokenizer_output = tokenizer.encode(text, true).expect("Unable to encode input string.");
	let token_ids = tokenizer_output.get_ids();
	let mask: Tensor = tract_ndarray::Array2::from_shape_fn((1, MAX_INPUT_LENGTH), \|idx\|{ if idx.0 < MAX_INPUT_LENGTH && idx.1 < MAX_INPUT_LENGTH { 1i64 } else { 0i64 } }).into();
	let token_tensor: Tensor = tract_ndarray::Array2::from_shape_fn((1, MAX_INPUT_LENGTH),\|idx\| { if idx.0 < token_ids.len() { token_ids[idx.0] as i64 } else { 0 as i64 } }).into();
	//let token_tensor: Tensor = tract_ndarray::Array2::from_shape_vec((1, token_ids.len()),token_ids.iter().map(\|&x\| x as i64).collect()).unwrap().into();
	let outputs = model.run(tvec!(token_tensor, mask)).expect("Failed to run model on token tensor.");
	let logits = outputs[0].to_array_view::<f32>().expect("Unable to convert tensor output to f32 array.");
	let word_id = if maxent {
	logits.iter().zip(0..).max_by(\|a, b\| a.0.partial_cmp(b.0).unwrap()).unwrap().1
	} else {
	let mut rng = thread_rng();
	let mut energy = rng.gen::<f32>();
	let mut selected_token = 0;
	for (idx, token_energy_hill) in logits.iter().enumerate() {
	if *token_energy_hill > energy {
	selected_token = idx;
	energy = 0.0;
	} else {
	energy -= *token_energy_hill;
	}
	}
	selected_token as u32
	};
	let word = tokenizer.id_to_token(word_id).unwrap_or(" ".into());
	result_channel.send(word).expect("Failed to send result.");
	}
	Err(_) => {
	return;
	}
	}
	}
	}

	pub fn start_language_model_thread() -> (Sender<String>, Receiver<String>) {
	let (user_input_tx, user_input_rx) = channel();
	let (ai_completion_tx, ai_completion_rx) = channel();

	thread::spawn(move \|\| run_language_model(user_input_rx, ai_completion_tx));

	(user_input_tx, ai_completion_rx)
	}