This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
streams: 1 | |
threads per block: 32 | |
blocks: 4096 | |
warp size: 64 | |
number of multiprocessors (CUs or WGPs): 120 | |
total threads: 131072 | |
max warps active: 4096 | |
max blocks active: 4096 | |
max threads per multiprocessor: 2560 | |
est. max warps per CU/WGP: 34.1333 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/mman.h> | |
const char start[] = {1,3,4,1,1,1,1,1,1,1,1,2,2,1,4,2,4,1,1,1,1,1,5,4,1,1,2,1,1,1,1,4,1,1,1,4,4,1,1,1,1,1,1,1,2,4,1,3,1,1,2,1,2,1,1,4,1,1,1,4,3,1,3,1,5,1,1,3,4,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,5,2,5,5,3,2,1,5,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,5,1,1,1,1,5,1,1,1,1,1,4,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,3,1,2,4,1,5,5,1,1,5,3,4,4,4,1,1,1,2,1,1,1,1,1,1,2,1,1,1,1,1,1,5,3,1,4,1,1,2,2,1,2,2,5,1,1,1,2,1,1,1,1,3,4,5,1,2,1,1,1,1,1,5,2,1,1,1,1,1,1,5,1,1,1,1,1,1,1,5,1,4,1,5,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,5,4,5,1,1,1,1,1,1,1,5,1,1,3,1,1,1,3,1,4,2,1,5,1,3,5,5,2,1,3,1,1,1,1,1,3,1,3,1,1,2,4,3,1,4,2,2,1,1,1,1,1,1,1,5,2,1,1,1,2}; | |
size_t num_for(size_t x) { | |
char* mem = mmap(NULL, 1000000000000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | |
if (mem == MAP_FAILED) { | |
puts(":("); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const Vec3 = struct { | |
x: f32, | |
y: f32, | |
z: f32, | |
const up = Vec3{.x = 0, .y = 1, .z = 0}; | |
fn add(a: Vec3, b: Vec3) Vec3 { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const Allocator = std.mem.Allocator; | |
const workload_size = 1 << 20; | |
const n_workloads = 4; | |
const repetition = 4; | |
const use_async = true; | |
const use_manual_prefetch = true; | |
const Cell = extern struct { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const mem = std.mem; | |
const testing = std.testing; | |
const Allocator = mem.Allocator; | |
const ArenaAllocator = std.heap.ArenaAllocator; | |
const SegmentedList = std.SegmentedList; | |
pub const Attribute = struct { | |
name: []const u8, | |
value: []const u8 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const Log2Int = std.math.Log2Int; | |
const IntType = std.meta.IntType; | |
fn radix(comptime T: type, comptime bits: Log2Int(T), shift: Log2Int(T), value: T) IntType(false, bits) { | |
const RadixType = IntType(false, bits); | |
return @intCast(RadixType, (value >> shift) & std.math.maxInt(RadixType)); | |
} | |
fn partialRadixSort(comptime T: type, comptime bits: Log2Int(T), shift: Log2Int(T), src: []const T, dst: []T) void { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Finds factors of 1 + 11! + (11!)! using CUDA | |
// Primes to check whether they are a factor of above numbers | |
// are to be supplied one-per-line to the program. | |
// See https://cr.yp.to/primegen.html for generating primes. | |
// Compile with `nvcc -o fact fact.cu`. | |
// Run with `./fact ./primes.txt` | |
#include <vector> | |
#include <fstream> | |
#include <iostream> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iterator> | |
#include <array> | |
#include <cstddef> | |
template <typename T> | |
using remove_cvref = std::remove_cv_t<std::remove_reference_t<T>>; | |
template <typename T, T... values> | |
struct Seq { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GPU: RX 580 (8 GB) | |
vulkan: 1.1.107 | |
total rays: 311040000 | |
total render time: 18540.1 | |
total mray/s: 16.7766 | |
average fps: 8.07517 | |
frames: 150 | |
# Frame number: total rays, outputs, total render time, max render time, min render time, mray/s | |
frame 0: 2073600 rays, 1, 77.8995 ms, 77.8995 ms, 77.8995 ms, 26.6189 mray/s | |
frame 1: 2073600 rays, 1, 77.8482 ms, 77.8482 ms, 77.8482 ms, 26.6365 mray/s |