Skip to content

Instantly share code, notes, and snippets.

@rachtsingh
Created May 9, 2025 00:43
Show Gist options
  • Save rachtsingh/e3d2e2b495d631b736d24b56ef1b2373 to your computer and use it in GitHub Desktop.
Save rachtsingh/e3d2e2b495d631b736d24b56ef1b2373 to your computer and use it in GitHub Desktop.
use clap::Parser;
use flate2::Compression as GzipCompression;
use flate2::write::GzEncoder;
use humansize::{BINARY, format_size};
use lz4::EncoderBuilder as Lz4EncoderBuilder;
use std::fs::File;
use std::io::{self, Read, Write};
use std::path::PathBuf;
use std::time::Instant;
use tempfile::tempdir;
use xz2::write::XzEncoder;
#[derive(Parser)]
#[command(
author,
version,
about = "Benchmarks file compressibility with various algorithms"
)]
struct Args {
/// Path to the file to benchmark
#[arg(required = true)]
file_path: PathBuf,
/// Compression levels to test
#[arg(short, long, num_args = 1.., default_values_t = [1, 3, 7, 10, 15])]
levels: Vec<i32>,
/// Compression algorithms to test
#[arg(short, long, num_args = 1.., default_values = ["zstd", "gzip", "lz4", "xz"])]
algorithms: Vec<String>,
}
fn main() -> io::Result<()> {
let args = Args::parse();
// Read input file
let mut input_file = File::open(&args.file_path)?;
let mut input_data = Vec::new();
input_file.read_to_end(&mut input_data)?;
let input_size = input_data.len();
println!("File: {}", args.file_path.display());
println!("Original size: {}", format_size(input_size, BINARY));
println!("\nCompression benchmark results:");
println!(
"{:<10} {:<7} {:<15} {:<15} {:<15}",
"Algorithm", "Level", "Size", "Ratio", "Time"
);
println!("{}", "-".repeat(62));
// Create a temporary directory for compressed files
let temp_dir = tempdir()?;
for algorithm in &args.algorithms {
for level in &args.levels {
let filename = format!("compressed_{}_{}.dat", algorithm, level);
let output_path = temp_dir.path().join(&filename);
let mut output_file = File::create(&output_path)?;
// Measure compression time
let start = Instant::now();
match algorithm.as_str() {
"zstd" => {
zstd::stream::copy_encode(&input_data[..], &mut output_file, *level)?;
}
"gzip" => {
// Convert level to gzip compression level (0-9)
let gz_level = match level {
1..=9 => *level as u32,
_ => {
if *level < 1 {
1
} else {
9
}
}
};
let mut encoder = GzEncoder::new(output_file, GzipCompression::new(gz_level));
encoder.write_all(&input_data)?;
encoder.finish()?;
}
"lz4" => {
// Convert level to lz4 compression level (1-16)
let lz4_level = match level {
1..=16 => *level as u32,
_ => {
if *level < 1 {
1
} else {
16
}
}
};
let mut encoder = Lz4EncoderBuilder::new()
.level(lz4_level)
.build(output_file)?;
encoder.write_all(&input_data)?;
let (_, result) = encoder.finish();
result?;
}
"xz" => {
// Convert level to xz compression level (0-9)
let xz_level = match level {
0..=9 => *level as u32,
_ => {
if *level < 0 {
0
} else {
9
}
}
};
let mut encoder = XzEncoder::new(output_file, xz_level);
encoder.write_all(&input_data)?;
encoder.finish()?;
}
_ => {
println!("Unknown algorithm: {}", algorithm);
continue;
}
};
let duration = start.elapsed();
// Get compressed file size
let compressed_size = output_path.metadata()?.len() as usize;
let ratio = input_size as f64 / compressed_size as f64;
println!(
"{:<10} {:<7} {:<15} {:<15.2} {:<15.2?}",
algorithm,
level,
format_size(compressed_size, BINARY),
ratio,
duration
);
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_compression_levels() -> io::Result<()> {
// Create a temporary test file with predictable content
let mut test_file = NamedTempFile::new()?;
let test_data = "test data ".repeat(1000);
test_file.write_all(test_data.as_bytes())?;
// Save the original test file path before calling Args::parse()
let test_file_path = test_file.path().to_path_buf();
// Read input file
let mut input_file = File::open(&test_file_path)?;
let mut input_data = Vec::new();
input_file.read_to_end(&mut input_data)?;
let input_size = input_data.len();
// Create a temporary directory for compressed files
let temp_dir = tempdir()?;
// Test compression levels for each algorithm
let algorithms = ["zstd", "gzip", "lz4", "xz"];
let levels = [1, 15]; // Test low and high compression levels
for algorithm in &algorithms {
let mut sizes = Vec::new();
for level in &levels {
let output_path = temp_dir
.path()
.join(format!("test_{}_{}.dat", algorithm, level));
let mut output_file = File::create(&output_path)?;
match *algorithm {
"zstd" => {
zstd::stream::copy_encode(&input_data[..], &mut output_file, *level)?;
}
"gzip" => {
let gz_level = match level {
1..=9 => *level as u32,
_ => {
if *level < 1 {
1
} else {
9
}
}
};
let mut encoder =
GzEncoder::new(output_file, GzipCompression::new(gz_level));
encoder.write_all(&input_data)?;
encoder.finish()?;
}
"lz4" => {
let lz4_level = match level {
1..=16 => *level as u32,
_ => {
if *level < 1 {
1
} else {
16
}
}
};
let mut encoder = Lz4EncoderBuilder::new()
.level(lz4_level)
.build(output_file)?;
encoder.write_all(&input_data)?;
let (_, result) = encoder.finish();
result?;
}
"xz" => {
let xz_level = match level {
0..=9 => *level as u32,
_ => {
if *level < 0 {
0
} else {
9
}
}
};
let mut encoder = XzEncoder::new(output_file, xz_level);
encoder.write_all(&input_data)?;
encoder.finish()?;
}
_ => panic!("Unknown algorithm: {}", algorithm),
}
let compressed_size = output_path.metadata()?.len() as usize;
sizes.push(compressed_size);
}
// Higher compression level should result in smaller file
assert!(
sizes[1] <= sizes[0],
"{}: Higher compression level should produce smaller file",
algorithm
);
// Compressed size should be smaller than original
assert!(
sizes[0] < input_size,
"{}: Compressed file should be smaller than original",
algorithm
);
}
Ok(())
}
#[test]
fn test_different_algorithms() -> io::Result<()> {
// Create a temporary test file with predictable content
let mut test_file = NamedTempFile::new()?;
let test_data = "test data ".repeat(1000);
test_file.write_all(test_data.as_bytes())?;
// Read input file
let mut input_file = File::open(test_file.path())?;
let mut input_data = Vec::new();
input_file.read_to_end(&mut input_data)?;
let input_size = input_data.len();
// Create a temporary directory for compressed files
let temp_dir = tempdir()?;
// Test all algorithms at a medium compression level
let algorithms = ["zstd", "gzip", "lz4", "xz"];
let level = 5;
let mut compressed_sizes = Vec::new();
for algorithm in &algorithms {
let output_path = temp_dir.path().join(format!("algo_test_{}.dat", algorithm));
let mut output_file = File::create(&output_path)?;
match *algorithm {
"zstd" => {
zstd::stream::copy_encode(&input_data[..], &mut output_file, level)?;
}
"gzip" => {
let gz_level = std::cmp::min(level, 9) as u32;
let mut encoder = GzEncoder::new(output_file, GzipCompression::new(gz_level));
encoder.write_all(&input_data)?;
encoder.finish()?;
}
"lz4" => {
let lz4_level = std::cmp::min(level, 16) as u32;
let mut encoder = Lz4EncoderBuilder::new()
.level(lz4_level)
.build(output_file)?;
encoder.write_all(&input_data)?;
let (_, result) = encoder.finish();
result?;
}
"xz" => {
let xz_level = std::cmp::min(level, 9) as u32;
let mut encoder = XzEncoder::new(output_file, xz_level);
encoder.write_all(&input_data)?;
encoder.finish()?;
}
_ => panic!("Unknown algorithm: {}", algorithm),
}
let compressed_size = output_path.metadata()?.len() as usize;
compressed_sizes.push((algorithm, compressed_size));
// Verify each algorithm produces a smaller file than the original
assert!(
compressed_size < input_size,
"{}: Compressed file should be smaller than original",
algorithm
);
}
Ok(())
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment