Created
September 19, 2025 06:16
-
-
Save jason-kane/130b531ac68d9aa932181fbd530b3fba to your computer and use it in GitHub Desktop.
Example of flux-schnell on RTX 3060 12GB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "torch>=2.8.0", | |
| # "diffusers@git+https://github.com/huggingface/diffusers.git", | |
| # "transformers==4.55.4", | |
| # "accelerate>=0.26.0", | |
| # "bitsandbytes==0.45.5", | |
| # "protobuf==5.29.4", | |
| # "sentencepiece", | |
| # ] | |
| # /// | |
| """ | |
| 12GB VRAM FLUX.1-schnell example | |
| After the first run it takes 38-41 seconds on a 12GB 3060 GTX | |
| to generate a 1024x1024 image | |
| If you have UV installed, you can just run this file and it will | |
| self-install. | |
| """ | |
| import gc | |
| import logging | |
| import random | |
| import torch | |
| from diffusers import DiffusionPipeline | |
| from transformers import ( | |
| BitsAndBytesConfig, | |
| T5EncoderModel, | |
| ) | |
| logging.basicConfig(level=logging.INFO) | |
| log = logging.getLogger(__name__) | |
| def disabled_safety_checker(images, clip_input): | |
| if len(images.shape)==4: | |
| num_images = images.shape[0] | |
| return images, [False]*num_images | |
| else: | |
| return images, False | |
| def local_flux_schnell( | |
| clip_prompt, # short prompt | |
| t5_prompt=None, # longer, more detailed prompt | |
| ): | |
| quantization_config = BitsAndBytesConfig(load_in_8bit=True) | |
| if t5_prompt is None: | |
| t5_prompt = clip_prompt | |
| model_id = "black-forest-labs/FLUX.1-schnell" #needs 4 steps only - it is faster than the dev version as the name implies | |
| text_encoder = T5EncoderModel.from_pretrained( | |
| model_id, | |
| subfolder="text_encoder_2", | |
| quantization_config=quantization_config, | |
| torch_dtype=torch.bfloat16 #bfloat16 and normal float16 both work - former gives a warning but seems to work | |
| ) | |
| pipe = DiffusionPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, #bfloat16 and float16 both work, must match the T5 | |
| text_encoder_2=text_encoder, | |
| device_map="balanced", | |
| max_memory={0:"11GiB", "cpu":"48GiB"}, | |
| ) | |
| pipe.safety_checker = disabled_safety_checker | |
| pipe.vae.enable_tiling() #less memory usage at VAE time | |
| log.info(f'Using {clip_prompt=} and {t5_prompt=} to generate a new image...') | |
| image = pipe( | |
| clip_prompt, | |
| prompt_2=t5_prompt, | |
| num_images_per_prompt=1, | |
| guidance_scale=0.0, #must be 0.0 for schnell version, dev version can be as per SD | |
| num_inference_steps=4, #only need 4 for schnell version, dev version needs 50 or so | |
| max_sequence_length=256, #relates to the T5 encoder - text_encoder_2 - max 256 for schnell | |
| generator=torch.Generator("cpu").manual_seed(int(random.randrange(4294967294))) | |
| ).images[0] | |
| del pipe | |
| del text_encoder | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| torch.cuda.ipc_collect() | |
| return image | |
| if __name__ == "__main__": | |
| prompt = "A beautiful landscape, trending on artstation" | |
| image = local_flux_schnell(prompt) | |
| image.save("flux_schnell_example.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment