Skip to content

Instantly share code, notes, and snippets.

@cmdr2
cmdr2 / overflow_clipping_test.py
Created July 24, 2025 09:34
Uses a fragment shader to discard pixels outside the clipping volume (specified by a transform matrix, and local size)
import bpy
import gpu
from gpu_extras.batch import batch_for_shader
VERT_SHADER = """
uniform mat4 ModelViewProjectionMatrix;
uniform mat4 ModelMatrix;
in vec2 texCoord;
in vec3 position;
import bpy
import gpu
from gpu_extras.batch import batch_for_shader
VERT_SHADER = """
uniform mat4 ModelViewProjectionMatrix;
in vec2 texCoord;
in vec3 position;
out vec3 pos;
// can run on https://thebookofshaders.com/edit.php
#ifdef GL_ES
precision mediump float;
#endif
uniform vec2 u_resolution;
vec3 hsv2rgb(vec3 c) {
vec4 K = vec4(1.0, 2.0/3.0, 1.0/3.0, 3.0);
#include <cuda_runtime.h>
#include <iostream>
#include <chrono>
#define CHECK_CUDA(call) \
if ((call) != cudaSuccess) { \
std::cerr << "CUDA error at " << __FILE__ << ":" << __LINE__ << std::endl; \
std::exit(1); \
}
#include <cuda_runtime.h>
#include <iostream>
#include <chrono>
#define CHECK_CUDA(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
std::cerr << "CUDA Error: " << cudaGetErrorString(err) << "\n"; \
exit(EXIT_FAILURE); \
#include <cuda_runtime.h>
#include <iostream>
#include <chrono>
#define CHECK_CUDA(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
std::cerr << "CUDA Error: " << cudaGetErrorString(err) << "\n"; \
exit(EXIT_FAILURE); \
12,13c12,15
< #include "ggml-cpu/unary-ops.h"
< #include "ggml-cpu/binary-ops.h"
---
> #include "unary-ops.h"
> #include "binary-ops.h"
> #include "vec.h"
> #include "ops.h"
86,109d87
< #if defined(GGML_USE_ACCELERATE)
@cmdr2
cmdr2 / ops.cpp.patch
Last active March 29, 2025 13:07
ops.cpp.patch
diff --git a/ops.cpp b/ops.cpp
index 6190d0d..c44157b 100644
--- a/ops.cpp
+++ b/ops.cpp
@@ -2347,7 +2347,7 @@ static void ggml_compute_forward_repeat_back_f32(
GGML_ASSERT(nb00 == sizeof(float));
if (ggml_is_contiguous(dst)) {
- ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0);
+ ggml_vec_set_f32(ne0*ne1*ne2*ne3, (float *)dst->data, 0);
11a12,13
> #include "ggml-cpu/unary-ops.h"
> #include "ggml-cpu/binary-ops.h"
4292,4625d4293
< static void ggml_compute_forward_add_f32(
< const struct ggml_compute_params * params,
< struct ggml_tensor * dst) {
<
< const struct ggml_tensor * src0 = dst->src[0];
< const struct ggml_tensor * src1 = dst->src[1];
@cmdr2
cmdr2 / simple_addition_fp16.cpp
Created February 24, 2025 07:44
Add two float16 tensors using ggml. Each tensor takes 1 GB of memory.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
#include <chrono>