Created
May 9, 2025 00:43
-
-
Save rachtsingh/e3d2e2b495d631b736d24b56ef1b2373 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use clap::Parser; | |
use flate2::Compression as GzipCompression; | |
use flate2::write::GzEncoder; | |
use humansize::{BINARY, format_size}; | |
use lz4::EncoderBuilder as Lz4EncoderBuilder; | |
use std::fs::File; | |
use std::io::{self, Read, Write}; | |
use std::path::PathBuf; | |
use std::time::Instant; | |
use tempfile::tempdir; | |
use xz2::write::XzEncoder; | |
#[derive(Parser)] | |
#[command( | |
author, | |
version, | |
about = "Benchmarks file compressibility with various algorithms" | |
)] | |
struct Args { | |
/// Path to the file to benchmark | |
#[arg(required = true)] | |
file_path: PathBuf, | |
/// Compression levels to test | |
#[arg(short, long, num_args = 1.., default_values_t = [1, 3, 7, 10, 15])] | |
levels: Vec<i32>, | |
/// Compression algorithms to test | |
#[arg(short, long, num_args = 1.., default_values = ["zstd", "gzip", "lz4", "xz"])] | |
algorithms: Vec<String>, | |
} | |
fn main() -> io::Result<()> { | |
let args = Args::parse(); | |
// Read input file | |
let mut input_file = File::open(&args.file_path)?; | |
let mut input_data = Vec::new(); | |
input_file.read_to_end(&mut input_data)?; | |
let input_size = input_data.len(); | |
println!("File: {}", args.file_path.display()); | |
println!("Original size: {}", format_size(input_size, BINARY)); | |
println!("\nCompression benchmark results:"); | |
println!( | |
"{:<10} {:<7} {:<15} {:<15} {:<15}", | |
"Algorithm", "Level", "Size", "Ratio", "Time" | |
); | |
println!("{}", "-".repeat(62)); | |
// Create a temporary directory for compressed files | |
let temp_dir = tempdir()?; | |
for algorithm in &args.algorithms { | |
for level in &args.levels { | |
let filename = format!("compressed_{}_{}.dat", algorithm, level); | |
let output_path = temp_dir.path().join(&filename); | |
let mut output_file = File::create(&output_path)?; | |
// Measure compression time | |
let start = Instant::now(); | |
match algorithm.as_str() { | |
"zstd" => { | |
zstd::stream::copy_encode(&input_data[..], &mut output_file, *level)?; | |
} | |
"gzip" => { | |
// Convert level to gzip compression level (0-9) | |
let gz_level = match level { | |
1..=9 => *level as u32, | |
_ => { | |
if *level < 1 { | |
1 | |
} else { | |
9 | |
} | |
} | |
}; | |
let mut encoder = GzEncoder::new(output_file, GzipCompression::new(gz_level)); | |
encoder.write_all(&input_data)?; | |
encoder.finish()?; | |
} | |
"lz4" => { | |
// Convert level to lz4 compression level (1-16) | |
let lz4_level = match level { | |
1..=16 => *level as u32, | |
_ => { | |
if *level < 1 { | |
1 | |
} else { | |
16 | |
} | |
} | |
}; | |
let mut encoder = Lz4EncoderBuilder::new() | |
.level(lz4_level) | |
.build(output_file)?; | |
encoder.write_all(&input_data)?; | |
let (_, result) = encoder.finish(); | |
result?; | |
} | |
"xz" => { | |
// Convert level to xz compression level (0-9) | |
let xz_level = match level { | |
0..=9 => *level as u32, | |
_ => { | |
if *level < 0 { | |
0 | |
} else { | |
9 | |
} | |
} | |
}; | |
let mut encoder = XzEncoder::new(output_file, xz_level); | |
encoder.write_all(&input_data)?; | |
encoder.finish()?; | |
} | |
_ => { | |
println!("Unknown algorithm: {}", algorithm); | |
continue; | |
} | |
}; | |
let duration = start.elapsed(); | |
// Get compressed file size | |
let compressed_size = output_path.metadata()?.len() as usize; | |
let ratio = input_size as f64 / compressed_size as f64; | |
println!( | |
"{:<10} {:<7} {:<15} {:<15.2} {:<15.2?}", | |
algorithm, | |
level, | |
format_size(compressed_size, BINARY), | |
ratio, | |
duration | |
); | |
} | |
} | |
Ok(()) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use std::fs::File; | |
use std::io::Write; | |
use tempfile::NamedTempFile; | |
#[test] | |
fn test_compression_levels() -> io::Result<()> { | |
// Create a temporary test file with predictable content | |
let mut test_file = NamedTempFile::new()?; | |
let test_data = "test data ".repeat(1000); | |
test_file.write_all(test_data.as_bytes())?; | |
// Save the original test file path before calling Args::parse() | |
let test_file_path = test_file.path().to_path_buf(); | |
// Read input file | |
let mut input_file = File::open(&test_file_path)?; | |
let mut input_data = Vec::new(); | |
input_file.read_to_end(&mut input_data)?; | |
let input_size = input_data.len(); | |
// Create a temporary directory for compressed files | |
let temp_dir = tempdir()?; | |
// Test compression levels for each algorithm | |
let algorithms = ["zstd", "gzip", "lz4", "xz"]; | |
let levels = [1, 15]; // Test low and high compression levels | |
for algorithm in &algorithms { | |
let mut sizes = Vec::new(); | |
for level in &levels { | |
let output_path = temp_dir | |
.path() | |
.join(format!("test_{}_{}.dat", algorithm, level)); | |
let mut output_file = File::create(&output_path)?; | |
match *algorithm { | |
"zstd" => { | |
zstd::stream::copy_encode(&input_data[..], &mut output_file, *level)?; | |
} | |
"gzip" => { | |
let gz_level = match level { | |
1..=9 => *level as u32, | |
_ => { | |
if *level < 1 { | |
1 | |
} else { | |
9 | |
} | |
} | |
}; | |
let mut encoder = | |
GzEncoder::new(output_file, GzipCompression::new(gz_level)); | |
encoder.write_all(&input_data)?; | |
encoder.finish()?; | |
} | |
"lz4" => { | |
let lz4_level = match level { | |
1..=16 => *level as u32, | |
_ => { | |
if *level < 1 { | |
1 | |
} else { | |
16 | |
} | |
} | |
}; | |
let mut encoder = Lz4EncoderBuilder::new() | |
.level(lz4_level) | |
.build(output_file)?; | |
encoder.write_all(&input_data)?; | |
let (_, result) = encoder.finish(); | |
result?; | |
} | |
"xz" => { | |
let xz_level = match level { | |
0..=9 => *level as u32, | |
_ => { | |
if *level < 0 { | |
0 | |
} else { | |
9 | |
} | |
} | |
}; | |
let mut encoder = XzEncoder::new(output_file, xz_level); | |
encoder.write_all(&input_data)?; | |
encoder.finish()?; | |
} | |
_ => panic!("Unknown algorithm: {}", algorithm), | |
} | |
let compressed_size = output_path.metadata()?.len() as usize; | |
sizes.push(compressed_size); | |
} | |
// Higher compression level should result in smaller file | |
assert!( | |
sizes[1] <= sizes[0], | |
"{}: Higher compression level should produce smaller file", | |
algorithm | |
); | |
// Compressed size should be smaller than original | |
assert!( | |
sizes[0] < input_size, | |
"{}: Compressed file should be smaller than original", | |
algorithm | |
); | |
} | |
Ok(()) | |
} | |
#[test] | |
fn test_different_algorithms() -> io::Result<()> { | |
// Create a temporary test file with predictable content | |
let mut test_file = NamedTempFile::new()?; | |
let test_data = "test data ".repeat(1000); | |
test_file.write_all(test_data.as_bytes())?; | |
// Read input file | |
let mut input_file = File::open(test_file.path())?; | |
let mut input_data = Vec::new(); | |
input_file.read_to_end(&mut input_data)?; | |
let input_size = input_data.len(); | |
// Create a temporary directory for compressed files | |
let temp_dir = tempdir()?; | |
// Test all algorithms at a medium compression level | |
let algorithms = ["zstd", "gzip", "lz4", "xz"]; | |
let level = 5; | |
let mut compressed_sizes = Vec::new(); | |
for algorithm in &algorithms { | |
let output_path = temp_dir.path().join(format!("algo_test_{}.dat", algorithm)); | |
let mut output_file = File::create(&output_path)?; | |
match *algorithm { | |
"zstd" => { | |
zstd::stream::copy_encode(&input_data[..], &mut output_file, level)?; | |
} | |
"gzip" => { | |
let gz_level = std::cmp::min(level, 9) as u32; | |
let mut encoder = GzEncoder::new(output_file, GzipCompression::new(gz_level)); | |
encoder.write_all(&input_data)?; | |
encoder.finish()?; | |
} | |
"lz4" => { | |
let lz4_level = std::cmp::min(level, 16) as u32; | |
let mut encoder = Lz4EncoderBuilder::new() | |
.level(lz4_level) | |
.build(output_file)?; | |
encoder.write_all(&input_data)?; | |
let (_, result) = encoder.finish(); | |
result?; | |
} | |
"xz" => { | |
let xz_level = std::cmp::min(level, 9) as u32; | |
let mut encoder = XzEncoder::new(output_file, xz_level); | |
encoder.write_all(&input_data)?; | |
encoder.finish()?; | |
} | |
_ => panic!("Unknown algorithm: {}", algorithm), | |
} | |
let compressed_size = output_path.metadata()?.len() as usize; | |
compressed_sizes.push((algorithm, compressed_size)); | |
// Verify each algorithm produces a smaller file than the original | |
assert!( | |
compressed_size < input_size, | |
"{}: Compressed file should be smaller than original", | |
algorithm | |
); | |
} | |
Ok(()) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment