Skip to content

Instantly share code, notes, and snippets.

@googlefan256
Created July 2, 2025 02:45
Show Gist options
  • Save googlefan256/7e31938e1b8709ffd56d1eb90e07b475 to your computer and use it in GitHub Desktop.
Save googlefan256/7e31938e1b8709ffd56d1eb90e07b475 to your computer and use it in GitHub Desktop.
DiffCoder 7Bと会話
from transformers import AutoModel, AutoTokenizer
import torch
torch.backends.cudnn.benchmark = True
device = "cuda:0"
tokenizer = AutoTokenizer.from_pretrained(
"apple/DiffuCoder-7B-cpGRPO", trust_remote_code=True, device_map=device
)
model = AutoModel.from_pretrained(
"apple/DiffuCoder-7B-cpGRPO", trust_remote_code=True, device_map=device
)
@torch.inference_mode()
def generate(chat, steps=30, temperature=0.0, max_new_tokens=100):
prompts_text = tokenizer.apply_chat_template(
chat, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(
text=prompts_text,
return_tensors="pt",
padding=True,
padding_side="left",
add_special_tokens=False,
).to(device)
ids = model.diffusion_generate(
inputs=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=max_new_tokens,
output_history=False,
return_dict_in_generate=True,
steps=steps,
temperature=temperature,
top_p=0.95 if temperature > 0 else 1.0,
alg="entropy",
alg_temp=0.0,
mask_token_id=tokenizer.mask_token_id,
)
out = tokenizer.decode(
ids.sequences[0, inputs.input_ids.size(-1) :].cpu().numpy(),
skip_special_tokens=True,
)
return out
chat = []
while True:
inp = input("User: ").strip()
if inp == "clear":
chat.clear()
continue
elif inp == "exit":
break
chat.append({"role": "user", "content": inp})
out = generate(chat, steps=64, max_new_tokens=256)
print("Assistant: " + out)
chat.append({"role": "user", "content": out})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment