Mark Varkevisser RealShocky

willccbb / grpo_demo.py

Last active April 28, 2025 01:48

GRPO Llama-1B

	# train_grpo.py
	#
	# See https://github.com/willccbb/verifiers for ongoing developments
	#
	import re
	import torch
	from datasets import load_dataset, Dataset
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import LoraConfig
	from trl import GRPOConfig, GRPOTrainer

davidberenstein1957 / synthetic_data_deepseekr1_qwen_distill.py

Last active February 28, 2025 13:10

	# /// script
	# requires-python = ">=3.11,<3.12"
	# dependencies = [
	# "distilabel[hf-transformers, hf-inference-endpoints]",
	# ]
	# ///
	from distilabel.models import InferenceEndpointsLLM
	from distilabel.pipeline import InstructionResponsePipeline

	repo_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

gidim / gist:18e1685f6a47b235e393e57bad89d454

Last active September 1, 2024 22:22

Legal Clause for Products Using LLMs

	Apache License
	Version 2.0, January 2004
	http://www.apache.org/licenses/

	TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

	1. Definitions.

	"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.