Skip to content

Instantly share code, notes, and snippets.

View RealShocky's full-sized avatar

Mark Varkevisser RealShocky

View GitHub Profile
@willccbb
willccbb / grpo_demo.py
Last active April 28, 2025 01:48
GRPO Llama-1B
# train_grpo.py
#
# See https://github.com/willccbb/verifiers for ongoing developments
#
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "distilabel[hf-transformers, hf-inference-endpoints]",
# ]
# ///
from distilabel.models import InferenceEndpointsLLM
from distilabel.pipeline import InstructionResponsePipeline
repo_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
@gidim
gidim / gist:18e1685f6a47b235e393e57bad89d454
Last active September 1, 2024 22:22
Legal Clause for Products Using LLMs
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.