Skip to content

Instantly share code, notes, and snippets.

@alifarazz
Last active June 4, 2025 21:34
Show Gist options
  • Save alifarazz/037a0f33b4fba15b6539b29a030d9132 to your computer and use it in GitHub Desktop.
Save alifarazz/037a0f33b4fba15b6539b29a030d9132 to your computer and use it in GitHub Desktop.
Beating Shortsy's A-star with Dijkstra
  • How to compile:

    • $ g++ -DBIG_INPUT -Ofast -mtune=native -march=native -pipe -funroll-loops -Wall -Wextra -Wcast-align -Wchar-subscripts -Wshadow -Wunused dijkstra.cc -o dijkstra -flto
      or
    • $ clang++ -stdlib=libc++ -DBIG_INPUT -Ofast -mtune=native -march=native -funroll-loops -Wall -Wextra -Wcast-align -Wchar-subscripts -Wshadow -Wunused dijkstra.cc -o dijkstra -flto
  • Results on a Intel Core i7-13620H, compiled with gcc, with 1024x1024 input:

    $ numactl --physcpubind=2 hyperfine -N -i -w 30 -r 100 './dijkstra'
    Benchmark 1: ./dijkstra
      Time (mean ± σ):       3.2 ms ±   0.0 ms    [User: 2.9 ms, System: 0.3 ms]
      Range (min … max):     3.2 ms …   3.4 ms    100 runs
  • Results on a AMD Ryzen 7 3750H, compiled with gcc, with 1024x1024 input:

    $ hyperfine -N -i -w 30 -r 100 './dijkstra'
    Benchmark 1: ./dijkstra
      Time (mean ± σ):      17.6 ms ±   1.2 ms    [User: 7.0 ms, System: 8.5 ms]
      Range (min … max):    15.4 ms …  19.8 ms    100 runs

  • I have pre-processed the input files to remove the newline at the end of each line:
    $ tr -d '\n' <AR0300SR_320x320.csv >AR0300SR_320x320.data
#include "dijkstra.hh"
#include <cstdint>
#include <cstdlib>
#include <cstring>
int8_t mark[HEIGHT][WIDTH];
uint dist[HEIGHT][WIDTH];
std::vector<Edge> alist;
std::vector<uint32_t> alist_idx;
enum FinalizationStatus : int8_t {
NotSeenAtAll = 0,
SeenAtLeastOnce,
Finalized
};
int main(int argc, char* argv[])
{
if (argc != 2) return -1;
const char* filepath = argv[1]; // "Cauldron_1024x1024_no_bs.data"
alist_idx.reserve(WIDTH * HEIGHT + 1);
alist.reserve(2946128); // |E| for 1024x1024
load_data(filepath);
const std::vector<VertexIdx> srcs { SRCS_INIT_LIST };
if constexpr (VERBOSE > 0) {
for (auto u : alist) {
std::cout << u.vid << ' ';
}
std::cout << std::endl;
for (auto u : alist) {
std::cout << (static_cast<int>(u.w) != 100) << ' ';
}
std::cout << std::endl;
for (auto u : alist_idx) {
std::cout << u << ' ';
}
std::cout << std::endl;
}
for (size_t i = 0; i < srcs.size(); i++) {
auto src = srcs[i];
std::printf("src: (%u, %u)\n", src.h, src.w);
bool first_time = true;
for (size_t j = 0; j < srcs.size(); j++) {
if (i != j) {
auto dest = srcs[j];
dijkstra(src, dest, first_time);
first_time = false;
std::printf("\tdst: (%u, %u):\t%i\n", dest.h, dest.w, mark[dest.h][dest.w] != NotSeenAtAll ? static_cast<int>(dist[dest.h][dest.w]) : -1);
}
}
if constexpr (VERBOSE > 1) {
for (int ii = 0; ii < HEIGHT; ii++) {
for (int j = 0; j < WIDTH; j++)
if (auto d = dist[j][ii]; d != UINT_MAX)
std::printf("%u\t", d);
else
std::printf("%i\t", -1);
std::puts("");
}
}
}
}
// TODO: make sure it's correct. [mostly done]
// TODO: do the directed graphs - undirected edges trick with min and max. could be pessimization
void load_data(const char* fpath)
{
int fd = open(fpath, O_RDONLY);
assert(fd != -1);
if (fd != -1) {
off_t file_size = get_file_size(fd);
if (file_size > 0) {
char* mem = static_cast<char*>(mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0));
if (mem) {
{
actually_load_data(mem);
}
assert(munmap(mem, file_size) == 0);
}
}
assert(close(fd) == 0);
}
}
void actually_load_data(const char* const mem)
{
// show_backtrace();
// raise(SIGTRAP);
auto point_is_within_box = [](int h, int w) -> int {
static_assert(sizeof(int) == 4, "sizeof(int) is not 4");
constexpr size_t mask_w = 0xFFFFFFFF - ((1 << LG_WIDTH) - 1);
constexpr size_t mask_h = 0xFFFFFFFF - ((1 << LG_HEIGHT) - 1);
if ((h & mask_h) | (w & mask_w)) // if (h >= HEIGHT || h < 0 || w >= WIDTH || w < 0) ret = false; else ret= true;
return false;
return true;
};
size_t memidx = 0;
for (int i = 0; i < HEIGHT; i++) {
for (int j = 0; j < WIDTH; j++) {
alist_idx.push_back(alist.size());
if (mem[memidx++] == '0')
continue;
constinit static std::array<std::pair<int, int>, 8> kernel = init_kernel();
#pragma GCC unroll 8
for (size_t ki = 0; ki < kernel.size(); ki++) {
const auto [ii, jj] = kernel[ki];
const auto ni = i + ii, nj = j + jj;
if (const auto neigh_idx = ni * WIDTH + nj;
point_is_within_box(ni, nj)) {
constexpr std::array<uint8_t, 3> vals { 0, 100, 141 };
const uint8_t w = vals[std::abs(ii) + std::abs(jj)];
if (mem[neigh_idx] == '1')
alist.emplace_back(static_cast<uint>(neigh_idx), w);
}
}
}
}
alist_idx.push_back(alist.size());
}
off_t get_file_size(int fd)
{
struct stat st;
if (fstat(fd, &st) < 0) {
perror("fstat");
return -1;
}
if (S_ISBLK(st.st_mode)) {
unsigned long long bytes;
if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) {
perror("ioctl");
return -1;
}
return bytes;
} else if (S_ISREG(st.st_mode))
return st.st_size;
return -1;
}
void dijkstra(VertexIdx s, VertexIdx dest, bool clean_run = true)
{
static std::priority_queue<PQVertex, std::vector<PQVertex>, std::greater<PQVertex>> pq;
if (clean_run) {
std::memset(mark, 0, sizeof(FinalizationStatus) * WIDTH * HEIGHT); // Set all to NotSeenAtAll
pq = decltype(pq) {};
pq.push(std::make_pair(0u, s));
dist[s.h][s.w] = 0;
mark[s.h][s.w] = SeenAtLeastOnce;
}
if (mark[dest.h][dest.w] == Finalized) // already seen dest
return;
if constexpr (VERBOSE > 0)
std::printf("choosing h: %u, w: %u as source, with id: %u\n", s.h, s.w, s.id);
while (!pq.empty()) {
auto [W, v] = pq.top();
if (v.id == dest.id) // found dest, don't expand it yet
return;
pq.pop();
if (mark[v.h][v.w] == Finalized) {
continue;
}
assert(mark[v.h][v.w] == SeenAtLeastOnce);
mark[v.h][v.w] = Finalized;
if constexpr (VERBOSE > 1)
std::cout << "POPPED" << '\t' << v.h << ' ' << v.w << ' ' << std::endl;
for (size_t i = alist_idx[v.id]; i < alist_idx[v.id + 1]; i++) {
Edge& neigh = alist[i];
VertexIdx u { .id = neigh.vid };
if constexpr (VERBOSE > 1)
std::cout << "\t\t" << u.h << ' ' << u.w << ' ' << std::endl;
assert(u.h < HEIGHT);
assert(u.w < WIDTH);
switch (mark[u.h][u.w]) {
case SeenAtLeastOnce:
if (dist[u.h][u.w] > dist[v.h][v.w] + neigh.w) {
dist[u.h][u.w] = dist[v.h][v.w] + neigh.w;
pq.emplace(dist[u.h][u.w], u);
}
break;
case NotSeenAtAll:
mark[u.h][u.w] = SeenAtLeastOnce;
dist[u.h][u.w] = dist[v.h][v.w] + neigh.w;
pq.emplace(dist[u.h][u.w], u);
break;
}
}
}
}
#pragma once
#include <array>
#include <cassert>
#include <climits>
#include <cstdint>
#include <iostream>
#include <queue>
#include <vector>
extern "C" {
#include <fcntl.h>
#include <linux/fs.h> // BLKGETSIZE64
#include <sys/ioctl.h> // ioctl
#include <sys/mman.h> // mmap(), munmap()
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h> // close()
}
#if !defined(VERBOSE)
#define VERBOSE 0
#endif
#ifdef BIG_INPUT
#define LG_HEIGHT 10
#define LG_WIDTH 10
#define SRCS_INIT_LIST \
{ { 31, 632 } }, \
{ { 325, 189 } }, \
{ { 132, 848 } }, \
{ { 179, 978 } }, \
{ { 142, 964 } }, \
{ { 979, 159 } }, \
{ { 170, 983 } }, \
{ { 836, 642 } }, \
{ { 395, 995 } }, \
{ { 651, 333 } }, \
{ { 572, 1011 } }, \
{ \
{ \
898, 879 \
}, \
}
#else
#define LG_HEIGHT 7
#define LG_WIDTH 7
#define INPUT_FILENAME "AR0301SR_86x92_padded_128x128.data";
#define SRCS_INIT_LIST \
{ { 0, 0 } }, \
{ { 91, 85 } }, \
{ { 85, 85 } }, \
{ { 30, 85 } }, \
{ { 84, 2 } }, \
{ { 2, 39 } }, \
{ { 50, 50 } }, \
{ { 21, 62 } }, \
{ { 24, 72 } }, \
{ { 72, 24 } },
#endif
#define LG_SUM_WH (LG_HEIGHT + LG_WIDTH)
#define HEIGHT (1 << LG_HEIGHT)
#define WIDTH (1 << LG_WIDTH)
union VertexIdx {
struct {
uint16_t h : LG_HEIGHT;
uint16_t w : LG_WIDTH;
};
uint32_t id : LG_SUM_WH;
bool operator<(VertexIdx const& other) const
{
return this->id < other.id;
}
};
static_assert(sizeof(VertexIdx) == 4, "bad sizeof() for struct VertexIdx");
using PQVertex = std::pair<uint32_t, VertexIdx>; // w, vertex
// using Edge = std::pair<char, VertexIdx>;
struct Edge {
unsigned int vid : 24;
uint8_t w : 8;
Edge(uint id, uint8_t win)
: vid { id }
, w { win }
{
}
};
static_assert(sizeof(Edge) == 4, "bad sizeof() for struct Edge");
static void load_data(const char*);
static void dijkstra(VertexIdx s, VertexIdx, bool);
static off_t get_file_size(int);
static void actually_load_data(const char* const);
constexpr std::array<std::pair<int, int>, 8> init_kernel()
{
std::array<std::pair<int, int>, 8> ret;
int k = 0;
for (int ii = -1; ii < 2; ii++) {
for (int jj = -1; jj < 2; jj++) {
if (ii == 0 && jj == 0)
continue;
ret[k++] = std::make_pair(ii, jj);
}
}
return ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment