a-r-r-o-w · September 15, 2024 19:44
diff --git a/convert_cogvideox_t2v_to_bnb.py b/convert_cogvideox_t2v_to_bnb.py
 import gc

 import torch
 from accelerate.utils import compute_module_sizes
 from diffusers import BitsAndBytesConfig, CogVideoXPipeline, CogVideoXTransformer3DModel
 from diffusers.utils import export_to_video
 from transformers import T5EncoderModel


 def reset_memory():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.reset_accumulated_memory_stats()


 def print_memory():
    memory = torch.cuda.memory_allocated() / 1024**3
    max_memory = torch.cuda.max_memory_allocated() / 1024**3
    max_reserved = torch.cuda.max_memory_reserved() / 1024**3
    print(f"{memory=:.2f}")
    print(f"{max_memory=:.2f}")
    print(f"{max_reserved=:.2f}")


 reset_memory()

 model_id = "THUDM/CogVideoX-5b"

 nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
 )
 transformer = CogVideoXTransformer3DModel.from_pretrained(
    model_id,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16,
 )
 text_encoder = T5EncoderModel.from_pretrained(
    model_id,
    subfolder="text_encoder",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16,
 )

 print(f"transformer: {compute_module_sizes(transformer)[''] / 1024**3:.2f}")
 print(f"text encoder: {compute_module_sizes(text_encoder)[''] / 1024**3:.2f}")
 print_memory()

 pipe = CogVideoXPipeline.from_pretrained(
    model_id,
    text_encoder=text_encoder,
    transformer=transformer,
    torch_dtype=torch.bfloat16,
 )

 pipe.save_pretrained("/raid/aryan/CogVideoX-5b-nf4", max_shard_size="5GB")
	import gc

	import torch
	from accelerate.utils import compute_module_sizes
	from diffusers import BitsAndBytesConfig, CogVideoXPipeline, CogVideoXTransformer3DModel
	from diffusers.utils import export_to_video
	from transformers import T5EncoderModel


	def reset_memory():
	gc.collect()
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats()
	torch.cuda.reset_accumulated_memory_stats()


	def print_memory():
	memory = torch.cuda.memory_allocated() / 1024**3
	max_memory = torch.cuda.max_memory_allocated() / 1024**3
	max_reserved = torch.cuda.max_memory_reserved() / 1024**3
	print(f"{memory=:.2f}")
	print(f"{max_memory=:.2f}")
	print(f"{max_reserved=:.2f}")


	reset_memory()

	model_id = "THUDM/CogVideoX-5b"

	nf4_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	)
	transformer = CogVideoXTransformer3DModel.from_pretrained(
	model_id,
	subfolder="transformer",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16,
	)
	text_encoder = T5EncoderModel.from_pretrained(
	model_id,
	subfolder="text_encoder",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16,
	)

	print(f"transformer: {compute_module_sizes(transformer)[''] / 1024**3:.2f}")
	print(f"text encoder: {compute_module_sizes(text_encoder)[''] / 1024**3:.2f}")
	print_memory()

	pipe = CogVideoXPipeline.from_pretrained(
	model_id,
	text_encoder=text_encoder,
	transformer=transformer,
	torch_dtype=torch.bfloat16,
	)

	pipe.save_pretrained("/raid/aryan/CogVideoX-5b-nf4", max_shard_size="5GB")