ls /usr/bin/python*
sudo apt-get remove python3.5
sudo apt-get remove --auto-remove python3.5
sudo apt-get purge python3.5
# train_grpo.py | |
# | |
# See https://github.com/willccbb/verifiers for ongoing developments | |
# | |
""" | |
citation: | |
@misc{brown2025grpodemo, | |
title={Granular Format Rewards for Eliciting Mathematical Reasoning Capabilities in Small Language Models}, | |
author={Brown, William}, |
#!/bin/bash | |
# This script initializes a GPU machine to start a vLLM server | |
# Ensure the script is run as root | |
if [[ $EUID -ne 0 ]]; then | |
echo "This script must be run as root" | |
exit 1 | |
fi |
import torch | |
import os | |
import argparse | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import seaborn as sns | |
def get_parser(): |
# Taken from https://johanwind.github.io/2023/03/23/rwkv_details.html. | |
# I've added additional comments restructured it a tiny bit, which makes it clearer for me. | |
import numpy as np | |
from torch import load as torch_load # Only for loading the model weights | |
from tokenizers import Tokenizer | |
exp = np.exp | |
layer_norm = lambda x, w, b : (x - np.mean(x)) / np.std(x) * w + b | |
sigmoid = lambda x : 1/(1 + exp(-x)) |
""" | |
Download the latest wiki dump files for a language, | |
If from_date is passed, the latest before that date is downloaded | |
""" | |
import requests | |
from datetime import datetime | |
from datetime import timedelta | |
from string import Template | |
from tqdm import tqdm |
import os | |
import json | |
from tqdm import tqdm | |
import torch | |
import torch.nn.functional as F | |
from transformers import AutoTokenizer, AutoModel | |
from elasticsearch import Elasticsearch | |
class JsonlCollectionIterator: |
mod ml_thread; | |
use gdnative::prelude::{godot_print, methods, Method, NativeClass, Node as GDNode, InitHandle, godot_init}; | |
use ml_thread::start_language_model_thread; | |
use std::sync::mpsc::{channel, Receiver, RecvError, Sender, SendError}; | |
const MAX_INPUT_LENGTH: usize = 512; | |
const BATCH_SIZE: usize = 1; |
# ==== Non-communicating processes | |
# 4x 1 chip per process: | |
os.environ["TPU_CHIPS_PER_PROCESS_BOUNDS"] = "1,1,1" | |
os.environ["TPU_PROCESS_BOUNDS"] = "1,1,1" | |
# Different per process: | |
os.environ["TPU_VISIBLE_DEVICES"] = "0" # "1", "2", "3" | |
# 1-liner for bash: TPU_CHIPS_PER_PROCESS_BOUNDS=1,1,1 TPU_PROCESS_BOUNDS=1,1,1 TPU_VISIBLE_DEVICES=0 TPU_MESH_CONTROLLER_ADDRESS=localhost:8476 TPU_MESH_CONTROLLER_PORT=8476 |