just throw the script into the root folder of sd-webui. and then modify the content to what you want to run. And then run it.
If you want to use some extension, just use my implementation as example (to modify the args for extensions)
import sys, os | |
sys.argv.append('--xformers') | |
# sys.argv.append('--opt-unet-fp8-storage') | |
# sys.argv.append('--medvram') | |
import shutil | |
from contextlib import closing | |
import numpy as np | |
import torch | |
torch.set_float32_matmul_precision('medium') | |
from fastapi import FastAPI | |
from modules import initialize_util | |
from modules import initialize | |
def create_api(app): | |
from modules.api.api import Api | |
from modules.call_queue import queue_lock | |
api = Api(app, queue_lock) | |
return api | |
t2i_script = None | |
args = None | |
api = None | |
initialize.imports() | |
initialize.check_versions() | |
def init(): | |
global api, t2i_script, args | |
from modules import script_callbacks, scripts, ui | |
initialize.initialize() | |
app = FastAPI() | |
initialize_util.setup_middleware(app) | |
api = create_api(app) | |
from modules import script_callbacks, shared | |
script_callbacks.before_ui_callback() | |
script_callbacks.app_started_callback(None, app) | |
scripts.scripts_txt2img.initialize_scripts(False) | |
assert shared.opts is not None | |
ui.create_ui() | |
t2i_script = scripts.scripts_txt2img.alwayson_scripts | |
args = api.init_default_script_args(scripts.scripts_txt2img) | |
return t2i_script, args | |
def load_model(filename): | |
from modules.sd_models import CheckpointInfo, reload_model_weights | |
from modules import shared | |
reload_model_weights(shared.sd_model, CheckpointInfo(filename)) | |
def enable_animatediff( | |
alwayson_scripts, | |
args, | |
format = None, | |
fps = 30, | |
model = '', | |
overlap = -1, | |
context_len = 16, | |
video_length = 0, | |
video_path = '' | |
): | |
for script in alwayson_scripts: | |
if script.name == 'animatediff': | |
target = script | |
break | |
else: | |
return args | |
assert model | |
if isinstance(format, str): | |
format = set([format]) | |
elif format is None: | |
format = set(['PNG']) | |
elif hasattr(format, '__iter__'): | |
format = set(format) | |
else: | |
assert isinstance(format, set) | |
# Thx to animatediff extension, we only have 1 arg here | |
animatediff_arg = args[target.args_from] | |
animatediff_arg.enable = True | |
animatediff_arg.format = format | |
animatediff_arg.fps = fps | |
animatediff_arg.model = model | |
animatediff_arg.overlap = overlap | |
animatediff_arg.batch_size = context_len | |
animatediff_arg.video_length = video_length | |
animatediff_arg.video_path = video_path | |
args[script.args_from] = animatediff_arg | |
return args | |
def enable_controlnet( | |
alwayson_scripts, | |
args, | |
control_mode = 'Balanced', | |
image = None, | |
model = '', | |
module = '', | |
pixel_perfect = False, | |
processor_res = 512, | |
resize_mode = 'Crop and Resize', | |
weight = 1.0, | |
extra_kwargs = {} | |
): | |
assert model and module | |
from scripts.controlnet_ui.controlnet_ui_group import UiControlNetUnit | |
for script in alwayson_scripts: | |
if script.name == 'controlnet': | |
target = script | |
break | |
else: | |
return args | |
if target.args_to - target.args_from: | |
remove = target.args_to - target.args_from | |
for cnet_arg in args[target.args_from:target.args_to]: | |
if cnet_arg.enabled: | |
remove -= 1 | |
if remove: | |
for _ in range(remove): | |
args.pop(target.args_to-1) | |
target.args_to -= 1 | |
shift = False | |
for script in alwayson_scripts: | |
if shift: | |
script.args_from -= remove | |
script.args_to -= remove | |
if script.name == 'controlnet': | |
shift = True | |
target.args_to += 1 | |
shift = False | |
for script in alwayson_scripts: | |
if shift: | |
script.args_from += 1 | |
script.args_to += 1 | |
if script.name == 'controlnet': | |
shift = True | |
unit = UiControlNetUnit() | |
if image is not None: | |
unit.image = {'image': np.array(image), 'mask': np.zeros_like(np.array(image))} | |
unit.model = model | |
unit.module = module | |
unit.pixel_perfect = pixel_perfect | |
unit.processor_res = processor_res | |
unit.resize_mode = resize_mode | |
unit.weight = weight | |
unit.control_mode = control_mode | |
for key, value in extra_kwargs.items(): | |
setattr(unit, key, value) | |
args.insert(target.args_to-1, unit) | |
return args | |
def disable_all( | |
alwayson_scripts, | |
args, | |
): | |
for script in alwayson_scripts: | |
if script.name == 'controlnet': | |
for arg in args[script.args_from:script.args_to]: | |
arg.enabled = False | |
if script.name == 'animatediff': | |
for arg in args[script.args_from:script.args_to]: | |
arg.enable = False | |
def txt2img( | |
prompt: str = 'walking on the street,1girl, solo, loli, dragon wings, dragon horns, standing, walking, onsen, forest, night sky, dragon tail, starry sky, japanese building, kimono, wet, wet clothes, see-through, long hair, one side up, japanese clothes, closed mouth, expressionless, flower, butterfly, looking away, wings', | |
negative_prompt: str = 'EasyNegativeV2, text, artists name, icon, logo, blush, looking at viewer, dragon', | |
steps: int = 30, | |
sampler_name: str = 'DPM++ 2M SDE Heun Exponential', | |
n_iter: int = 1, | |
batch_size: int = 3, | |
cfg_scale: float = 5, | |
height: int = 960, | |
width: int = 576, | |
enable_hr = False, | |
hr_steps: int = 10, | |
hr_height: int = 1600, | |
hr_width: int = 960, | |
t2i_script = None, | |
args: tuple = tuple() | |
): | |
from modules import processing, shared, scripts | |
p = processing.StableDiffusionProcessingTxt2Img( | |
sd_model=shared.sd_model, | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
sampler_name=sampler_name, | |
batch_size=batch_size, | |
n_iter=n_iter, | |
steps=steps, | |
cfg_scale=cfg_scale, | |
width=width, | |
height=height, | |
firstphase_width=width, | |
firstphase_height=height, | |
enable_hr=enable_hr, | |
hr_resize_x=hr_width, | |
hr_resize_y=hr_height, | |
hr_upscaler='Lanczos', | |
hr_second_pass_steps=hr_steps, | |
denoising_strength=0.6, | |
do_not_save_grid=True, | |
do_not_save_samples=True | |
) | |
scripts.scripts_txt2img.alwayson_scripts = t2i_script | |
p.scripts = scripts.scripts_txt2img | |
p.script_args = args | |
with closing(p): | |
processed = scripts.scripts_txt2img.run(p, *args) | |
if processed is None: | |
processed = processing.process_images(p) | |
return processed.images | |
@torch.no_grad() | |
def process( | |
video_path = '', | |
config = { | |
'prompt': '1girl', | |
'resolution': [(768, 768), None], | |
'steps': [20, None], | |
'cnet': [ | |
{ | |
'model': 'control_v11p_sd15_canny', | |
'module': 'canny', | |
'weight': 0.8, | |
'extra_kwargs': { | |
'threshold_a': 50, | |
'threshold_b': 150, | |
} | |
}, | |
{ | |
'model': 'control_v11f1p_sd15_depth', | |
'module': 'depth_midas', | |
'weight': 0.7 | |
} | |
], | |
'context_len': 16 | |
} | |
): | |
global t2i_script, args | |
root_path = os.path.dirname(video_path.rstrip('/')) | |
result_path = os.path.join(root_path, 'result') | |
args = enable_animatediff( | |
alwayson_scripts = t2i_script, | |
args = args, | |
format = 'PNG', | |
model = 'mm_sd_v15_v2.ckpt', | |
fps = 30, | |
context_len = config['context_len'], | |
video_path = video_path | |
) | |
for cnet in config['cnet']: | |
args = enable_controlnet( | |
alwayson_scripts = t2i_script, | |
args = args, | |
**cnet | |
) | |
w = config['resolution'][0][0] | |
h = config['resolution'][0][1] | |
if config['resolution'][1] is None: | |
enable_hr = False | |
hr_steps = hw = hh = 0 | |
else: | |
enable_hr = True | |
hw = config['resolution'][1][0] | |
hh = config['resolution'][1][1] | |
hr_steps = config['steps'][1] or config['steps'][0] | |
from modules import devices | |
with devices.autocast(): | |
test_imgs = txt2img( | |
prompt = config['prompt'], | |
negative_prompt = 'EasyNegativeV2', | |
steps = config['steps'][0], | |
sampler_name = 'DPM++ 2M SDE Exponential', | |
n_iter = 1, | |
batch_size = 1, | |
cfg_scale = 7, | |
height = h, | |
width = w, | |
enable_hr = enable_hr, | |
hr_steps = hr_steps, | |
hr_height = hh, | |
hr_width = hw, | |
t2i_script = t2i_script, | |
args = args, | |
) | |
devices.torch_gc() | |
disable_all(t2i_script, args) | |
#ignore detect map from CNet | |
# Noted: you may want to use same sorting alrogithm in Sd-webui | |
frames = [i for i in os.listdir(video_path)] | |
for frame_name, new_frame in zip(frames, test_imgs): | |
new_frame_name = os.path.splitext(os.path.basename(frame_name))[0] | |
new_frame_path = os.path.join(result_path, new_frame_name) | |
new_frame.save(f'{new_frame_path}.png') | |
if __name__ == '__main__': | |
t2i_script, arg = init() | |
load_model('./models/Stable-diffusion/KBlueLeaf/kohaku-v4-rev1.2.safetensors') | |
import time | |
t0 = time.time() | |
process(r'VIDEO_PATH') | |
t1 = time.time() | |
print(t1-t0) |