Skip to content

Instantly share code, notes, and snippets.

@jason-kane
Created September 19, 2025 06:16
Show Gist options
  • Select an option

  • Save jason-kane/130b531ac68d9aa932181fbd530b3fba to your computer and use it in GitHub Desktop.

Select an option

Save jason-kane/130b531ac68d9aa932181fbd530b3fba to your computer and use it in GitHub Desktop.
Example of flux-schnell on RTX 3060 12GB
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "torch>=2.8.0",
# "diffusers@git+https://github.com/huggingface/diffusers.git",
# "transformers==4.55.4",
# "accelerate>=0.26.0",
# "bitsandbytes==0.45.5",
# "protobuf==5.29.4",
# "sentencepiece",
# ]
# ///
"""
12GB VRAM FLUX.1-schnell example
After the first run it takes 38-41 seconds on a 12GB 3060 GTX
to generate a 1024x1024 image
If you have UV installed, you can just run this file and it will
self-install.
"""
import gc
import logging
import random
import torch
from diffusers import DiffusionPipeline
from transformers import (
BitsAndBytesConfig,
T5EncoderModel,
)
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
def disabled_safety_checker(images, clip_input):
if len(images.shape)==4:
num_images = images.shape[0]
return images, [False]*num_images
else:
return images, False
def local_flux_schnell(
clip_prompt, # short prompt
t5_prompt=None, # longer, more detailed prompt
):
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
if t5_prompt is None:
t5_prompt = clip_prompt
model_id = "black-forest-labs/FLUX.1-schnell" #needs 4 steps only - it is faster than the dev version as the name implies
text_encoder = T5EncoderModel.from_pretrained(
model_id,
subfolder="text_encoder_2",
quantization_config=quantization_config,
torch_dtype=torch.bfloat16 #bfloat16 and normal float16 both work - former gives a warning but seems to work
)
pipe = DiffusionPipeline.from_pretrained(
model_id,
torch_dtype=torch.bfloat16, #bfloat16 and float16 both work, must match the T5
text_encoder_2=text_encoder,
device_map="balanced",
max_memory={0:"11GiB", "cpu":"48GiB"},
)
pipe.safety_checker = disabled_safety_checker
pipe.vae.enable_tiling() #less memory usage at VAE time
log.info(f'Using {clip_prompt=} and {t5_prompt=} to generate a new image...')
image = pipe(
clip_prompt,
prompt_2=t5_prompt,
num_images_per_prompt=1,
guidance_scale=0.0, #must be 0.0 for schnell version, dev version can be as per SD
num_inference_steps=4, #only need 4 for schnell version, dev version needs 50 or so
max_sequence_length=256, #relates to the T5 encoder - text_encoder_2 - max 256 for schnell
generator=torch.Generator("cpu").manual_seed(int(random.randrange(4294967294)))
).images[0]
del pipe
del text_encoder
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
return image
if __name__ == "__main__":
prompt = "A beautiful landscape, trending on artstation"
image = local_flux_schnell(prompt)
image.save("flux_schnell_example.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment