Install packages:
pip install open-webui mlx-lm
Start Open WebUI server:
import argparse | |
import math | |
import mlx.core as mx | |
import mlx.nn as nn | |
from tqdm import tqdm | |
from mlx_lm.utils import load | |
from pathlib import Path | |
def eval_ppl(model, data, batch_size=32): |
class GLU: Module, UnaryLayer { | |
let dim: Int | |
init(dim: Int) { | |
self.dim = dim | |
} | |
func callAsFunction(_ x: MLXArray) -> MLXArray { | |
let (a, b) = x.split(axis: dim) | |
return a * MLXNN.sigmoid(b) |
On every machine in the cluster install openmpi
and mlx-lm
:
conda install conda-forge::openmpi
pip install -U mlx-lm
Next download the pipeline parallel run script. Download it to the same path on every machine:
cmake_minimum_required(VERSION 3.27) | |
project(example LANGUAGES CXX) | |
set(CMAKE_CXX_STANDARD 17) | |
set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
find_package( | |
Python 3.9 | |
COMPONENTS Interpreter Development.Module |
import argparse | |
from functools import partial | |
import multiprocessing as mp | |
from typing import Callable, Optional | |
import mlx.core as mx | |
import mlx.nn as nn | |
from mlx.utils import tree_map_with_path | |
from mlx_lm.utils import * |
This guide is adapted from this original post by Christopher Charles.
from huggingface_hub import snapshot_download | |
import mlx.core as mx | |
import mlx.nn as nn | |
import time | |
class Block(nn.Module): | |
def __init__(self, in_dims, dims, stride=1): | |
super().__init__() |