Skip to content

Instantly share code, notes, and snippets.

@a-r-r-o-w
Created September 15, 2024 19:44
Show Gist options
  • Save a-r-r-o-w/74c720bc4add739b874f85d931436041 to your computer and use it in GitHub Desktop.
Save a-r-r-o-w/74c720bc4add739b874f85d931436041 to your computer and use it in GitHub Desktop.
Conversion of CogVideoX to bitsandbytes. Based on [this](https://github.com/huggingface/diffusers/pull/9213) Diffusers PR
import gc
import torch
from accelerate.utils import compute_module_sizes
from diffusers import BitsAndBytesConfig, CogVideoXPipeline, CogVideoXTransformer3DModel
from diffusers.utils import export_to_video
from transformers import T5EncoderModel
def reset_memory():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
torch.cuda.reset_accumulated_memory_stats()
def print_memory():
memory = torch.cuda.memory_allocated() / 1024**3
max_memory = torch.cuda.max_memory_allocated() / 1024**3
max_reserved = torch.cuda.max_memory_reserved() / 1024**3
print(f"{memory=:.2f}")
print(f"{max_memory=:.2f}")
print(f"{max_reserved=:.2f}")
reset_memory()
model_id = "THUDM/CogVideoX-5b"
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
transformer = CogVideoXTransformer3DModel.from_pretrained(
model_id,
subfolder="transformer",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16,
)
text_encoder = T5EncoderModel.from_pretrained(
model_id,
subfolder="text_encoder",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16,
)
print(f"transformer: {compute_module_sizes(transformer)[''] / 1024**3:.2f}")
print(f"text encoder: {compute_module_sizes(text_encoder)[''] / 1024**3:.2f}")
print_memory()
pipe = CogVideoXPipeline.from_pretrained(
model_id,
text_encoder=text_encoder,
transformer=transformer,
torch_dtype=torch.bfloat16,
)
pipe.save_pretrained("/raid/aryan/CogVideoX-5b-nf4", max_shard_size="5GB")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment