Skip to content

Instantly share code, notes, and snippets.

@kerrytazi
Last active April 2, 2025 03:17
Show Gist options
  • Save kerrytazi/72e58193de6ecfccd96280f626fb3c90 to your computer and use it in GitHub Desktop.
Save kerrytazi/72e58193de6ecfccd96280f626fb3c90 to your computer and use it in GitHub Desktop.
My implementation of A* path finding algorithm. Heavily optimized.
#pragma once
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include <utility>
#include <vector>
#include <memory>
#include <algorithm>
#include <cmath>
//#define DEBUG_ASTAR
#ifdef DEBUG_ASTAR
#include <cstdio>
#endif // DEBUG_ASTAR
namespace astar
{
namespace _detail
{
// https://stackoverflow.com/a/70550680
static constexpr const uint8_t clz_tab[32] =
{
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
};
constexpr int clz32(uint32_t a)
{
if (std::is_constant_evaluated())
{
a |= a >> 16;
a |= a >> 8;
a |= a >> 4;
a |= a >> 2;
a |= a >> 1;
return clz_tab[0x07c4acdd * a >> 27];
}
else
{
#ifdef _MSC_VER
return __lzcnt(a);
#else
return __builtin_clz(a);
#endif //
}
}
// isqrt32_tab[k] = isqrt(256*(k+64)-1) for 0 <= k < 192
static constexpr const uint8_t isqrt32_tab[192] =
{
127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
137, 138, 139, 140, 141, 142, 143, 143, 144, 145,
146, 147, 148, 149, 150, 150, 151, 152, 153, 154,
155, 155, 156, 157, 158, 159, 159, 160, 161, 162,
163, 163, 164, 165, 166, 167, 167, 168, 169, 170,
170, 171, 172, 173, 173, 174, 175, 175, 176, 177,
178, 178, 179, 180, 181, 181, 182, 183, 183, 184,
185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
191, 192, 193, 193, 194, 195, 195, 196, 197, 197,
198, 199, 199, 200, 201, 201, 202, 203, 203, 204,
204, 205, 206, 206, 207, 207, 208, 209, 209, 210,
211, 211, 212, 212, 213, 214, 214, 215, 215, 216,
217, 217, 218, 218, 219, 219, 220, 221, 221, 222,
222, 223, 223, 224, 225, 225, 226, 226, 227, 227,
228, 229, 229, 230, 230, 231, 231, 232, 232, 233,
234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
239, 239, 240, 241, 241, 242, 242, 243, 243, 244,
244, 245, 245, 246, 246, 247, 247, 248, 248, 249,
249, 250, 250, 251, 251, 252, 252, 253, 253, 254,
254, 255,
};
// integer square root of 32-bit unsigned integer
constexpr uint16_t isqrt32(uint32_t x)
{
if (x == 0) return 0;
int lz = clz32(x) & 30;
x <<= lz;
uint16_t y = 1 + isqrt32_tab[(x >> 24) - 64];
y = (y << 7) + (x >> 9) / y;
y -= x < (uint32_t)y * y;
return y >> (lz >> 1);
}
} // namespace _detail
struct Vec2
{
uint32_t x;
uint32_t y;
constexpr Vec2 &operator+=(Vec2 const &other)
{
x += other.x;
y += other.y;
return *this;
}
[[nodiscard]]
constexpr Vec2 operator+(Vec2 const &other) const
{
return Vec2(*this) += other;
}
constexpr Vec2 &operator-=(Vec2 const &other)
{
x -= other.x;
y -= other.y;
return *this;
}
[[nodiscard]]
constexpr Vec2 operator-(Vec2 const &other) const
{
return Vec2(*this) -= other;
}
[[nodiscard]]
constexpr bool operator==(Vec2 const &other) const
{
return x == other.x && y == other.y;
}
[[nodiscard]]
constexpr bool operator!=(Vec2 const &other) const
{
return !operator==(other);
}
[[nodiscard]]
constexpr static uint32_t distance_squared(Vec2 const &pos1, Vec2 const &pos2)
{
auto x = pos1.x - pos2.x;
auto y = pos1.y - pos2.y;
return x*x + y*y;
}
[[nodiscard]]
constexpr uint32_t distance_squared(Vec2 const &other) const
{
return distance_squared(*this, other);
}
[[nodiscard]]
constexpr static uint16_t distance16(Vec2 const &pos1, Vec2 const &pos2)
{
// it is not correct math, but it is faster and still pretty accurate
return _detail::isqrt32(distance_squared(pos1, pos2) * 16);
}
[[nodiscard]]
constexpr uint16_t distance16(Vec2 const &other) const
{
return distance16(*this, other);
}
};
enum class PathFindingResult
{
NeedMoreWork,
NotFound,
Found,
};
class State
{
private:
enum class CellChecked : uint32_t
{
NotChecked,
CheckedEmpty,
CheckedWall,
};
struct Cell
{
Vec2 from;
uint16_t distance_from;
uint16_t distance;
CellChecked checked;
};
struct PosWithDistance : Vec2
{
uint32_t distance;
};
uint8_t const *field_ = nullptr;
Vec2 field_size_ = {};
Vec2 start_pos_ = {};
Vec2 finish_pos_ = {};
std::unique_ptr<Cell[]> cells_;
std::vector<PosWithDistance> cells_to_check_;
bool search_started_ = false;
bool found_path_ = false;
enum class CheckResult
{
NotFound,
Found,
Recheck,
};
[[nodiscard]]
bool check_neighbour(Vec2 const &new_cell_pos, Vec2 const &from, uint32_t new_distance_from)
{
// if caller guarantee walls around. we can skip bounds check
if (new_cell_pos.x >= field_size_.x || new_cell_pos.y >= field_size_.y)
return false;
Cell &cell = cells_[new_cell_pos.y * field_size_.x + new_cell_pos.x];
if (new_cell_pos == finish_pos_)
{
cell.from = from;
cell.distance_from = new_distance_from;
found_path_ = true;
cells_to_check_.clear();
return true;
}
if (cell.checked == CellChecked::CheckedWall)
return false;
if (cell.checked == CellChecked::CheckedEmpty && cell.distance_from <= new_distance_from)
return false;
if (field_[new_cell_pos.y * field_size_.x + new_cell_pos.x] != 0)
{
cell.checked = CellChecked::CheckedWall;
return false;
}
else
{
if (cell.checked != CellChecked::CheckedEmpty)
{
cell.distance = new_distance_from + new_cell_pos.distance16(finish_pos_);
cell.checked = CellChecked::CheckedEmpty;
}
cell.from = from;
cell.distance_from = new_distance_from;
}
auto it = std::lower_bound(cells_to_check_.begin(), cells_to_check_.end(), cell.distance, [](PosWithDistance const &pos, uint32_t new_distance) {
return pos.distance > new_distance;
});
cells_to_check_.insert(it, { new_cell_pos, cell.distance });
return false;
}
[[nodiscard]]
bool check_cell(Vec2 const &pos)
{
Cell &cell = cells_[pos.y * field_size_.x + pos.x];
// new_cell_pos.distance16(from)
constexpr uint32_t straight_distance = Vec2{ 0, 1 }.distance16({ 0, 0 });
constexpr uint32_t diagonal_distance = Vec2{ 1, 1 }.distance16({ 0, 0 });
if (check_neighbour({ pos.x - 1, pos.y - 1 }, pos, cell.distance_from + diagonal_distance))
return true;
if (check_neighbour({ pos.x - 1, pos.y }, pos, cell.distance_from + straight_distance))
return true;
if (check_neighbour({ pos.x - 1, pos.y + 1 }, pos, cell.distance_from + diagonal_distance))
return true;
if (check_neighbour({ pos.x , pos.y - 1 }, pos, cell.distance_from + straight_distance))
return true;
if (check_neighbour({ pos.x , pos.y + 1 }, pos, cell.distance_from + straight_distance))
return true;
if (check_neighbour({ pos.x + 1, pos.y - 1 }, pos, cell.distance_from + diagonal_distance))
return true;
if (check_neighbour({ pos.x + 1, pos.y }, pos, cell.distance_from + straight_distance))
return true;
if (check_neighbour({ pos.x + 1, pos.y + 1 }, pos, cell.distance_from + diagonal_distance))
return true;
#ifdef DEBUG_ASTAR
{
for (uint32_t y = 140; y < 190; ++y)
{
for (uint32_t x = 290; x < 340; ++x)
{
if (start_pos_ == Vec2{ x, y })
{
printf("@ ");
}
else
if (finish_pos_ == Vec2{ x, y })
{
printf("$ ");
}
else
if (pos == Vec2{ x, y })
{
printf("%% ");
}
else
if (cells_[y * field_size_.x + x].checked == CellChecked::NotChecked)
{
if (field_[y * field_size_.x + x])
printf("# ");
else
printf("_ ");
}
else
{
printf("%d ", (int)cells_[y * field_size_.x + x].checked);
}
}
printf("\n");
}
printf("\n");
int a = 0;
}
#endif // DEBUG_ASTAR
return false;
}
public:
State(uint8_t const *field, Vec2 field_size)
: field_(field)
, field_size_(field_size)
, cells_(std::make_unique<Cell[]>(field_size.x * field_size.y))
{
}
void start_search(Vec2 start_pos, Vec2 finish_pos)
{
start_pos_ = start_pos;
finish_pos_ = finish_pos;
cells_to_check_.clear();
cells_to_check_.reserve((field_size_.x + field_size_.y) * 10);
for (uint32_t i = 0; i < field_size_.x * field_size_.y; ++i)
cells_[i] = {};
if (start_pos_ == finish_pos_)
{
search_started_ = true;
found_path_ = true;
return;
}
if (field_[start_pos_.y * field_size_.x + start_pos_.x] != 0 || field_[finish_pos_.y * field_size_.x + finish_pos_.x] != 0)
{
search_started_ = true;
found_path_ = false;
return;
}
cells_to_check_.push_back({ start_pos_, start_pos_.distance16(finish_pos_) });
cells_[start_pos_.y * field_size_.x + start_pos_.x].checked = CellChecked::CheckedEmpty;
search_started_ = true;
found_path_ = false;
}
[[nodiscard]]
PathFindingResult iter(uint32_t times = 1)
{
while (times--)
{
if (cells_to_check_.empty())
return found_path_ ? PathFindingResult::Found : PathFindingResult::NotFound;
Vec2 pos = cells_to_check_.back();
cells_to_check_.pop_back();
if (check_cell(pos))
return PathFindingResult::Found;
}
return PathFindingResult::NeedMoreWork;
}
[[nodiscard]]
std::vector<Vec2> get_result() const
{
if (!search_started_ || !found_path_)
return {};
Vec2 pos = finish_pos_;
Cell *cell = &cells_[finish_pos_.y * field_size_.x + finish_pos_.x];
std::vector<Vec2> result;
result.reserve(cell->distance_from / 4);
while (pos != start_pos_)
{
result.push_back(pos);
pos = cell->from;
cell = &cells_[cell->from.y * field_size_.x + cell->from.x];
}
result.push_back(pos);
std::reverse(result.begin(), result.end());
return result;
}
};
namespace helper
{
inline std::pair<bool, Vec2> raycast(uint8_t const *field, Vec2 field_size, Vec2 from, Vec2 to)
{
// https://lodev.org/cgtutor/raycasting.html
struct Vec2f
{
float x;
float y;
constexpr Vec2f &operator-=(Vec2f const &other)
{
x -= other.x;
y -= other.y;
return *this;
}
[[nodiscard]]
constexpr Vec2f operator-(Vec2f const &other) const
{
return Vec2f(*this) -= other;
}
[[nodiscard]]
constexpr static float distance_squared(Vec2f const &pos1, Vec2f const &pos2)
{
auto x = pos1.x - pos2.x;
auto y = pos1.y - pos2.y;
return x*x + y*y;
}
[[nodiscard]]
constexpr float distance_squared() const
{
return x*x + y*y;
}
[[nodiscard]]
float distance() const
{
return sqrtf(distance_squared());
}
[[nodiscard]]
constexpr float distance_squared(Vec2f const &other) const
{
return distance_squared(*this, other);
}
[[nodiscard]]
float distance(Vec2f const &other) const
{
return sqrtf(distance_squared(other));
}
[[nodiscard]]
Vec2f norm() const
{
float mult = 1.0f / distance();
return { x * mult, y * mult };
}
};
Vec2f vRay = Vec2f{ float(to.x), float(to.y) } - Vec2f{ float(from.x), float(from.y) };
float vRayDistance = vRay.distance();
Vec2f vRayDir = vRay.norm();
Vec2f vRayUnitStepSize = { sqrtf(1.0f + (vRayDir.y / vRayDir.x) * (vRayDir.y / vRayDir.x)), sqrtf(1.0f + (vRayDir.x / vRayDir.y) * (vRayDir.x / vRayDir.y)) };
Vec2 vMapCheck = from;
Vec2 vStep;
Vec2f vRayLength1D;
if (vRayDir.x < 0.0f)
{
vStep.x = (uint32_t)-1;
vRayLength1D.x = (from.x - float(vMapCheck.x)) * vRayUnitStepSize.x;
}
else
{
vStep.x = 1;
vRayLength1D.x = (float(vMapCheck.x + 1) - from.x) * vRayUnitStepSize.x;
}
if (vRayDir.y < 0.0f)
{
vStep.y = (uint32_t)-1;
vRayLength1D.y = (from.y - float(vMapCheck.y)) * vRayUnitStepSize.y;
}
else
{
vStep.y = 1;
vRayLength1D.y = (float(vMapCheck.y + 1) - from.y) * vRayUnitStepSize.y;
}
bool bTileFound = false;
float fMaxDistance = vRayDistance;
float fDistance = 0.0f;
while (!bTileFound && fDistance < fMaxDistance)
{
if (vRayLength1D.x < vRayLength1D.y)
{
vMapCheck.x += vStep.x;
fDistance = vRayLength1D.x;
vRayLength1D.x += vRayUnitStepSize.x;
}
else
{
vMapCheck.y += vStep.y;
fDistance = vRayLength1D.y;
vRayLength1D.y += vRayUnitStepSize.y;
}
if (vMapCheck.x < field_size.x && vMapCheck.y < field_size.y)
{
if (field[vMapCheck.y * field_size.x + vMapCheck.x] != 0)
{
return { true, { (uint32_t)vMapCheck.x, (uint32_t)vMapCheck.y } };
}
}
}
return { false, {} };
}
inline void minimize(std::vector<Vec2> &arr, uint32_t max_remove = (uint32_t)-1)
{
if (arr.size() < 3)
return;
uint32_t remove_counter = 0;
Vec2 last_dir = arr[0] - arr[1];
for (auto it = arr.begin() + 1; it != arr.end() - 1;)
{
Vec2 c1 = *(it );
Vec2 c2 = *(it + 1);
if (last_dir == c1 - c2 && remove_counter < max_remove)
{
it = arr.erase(it);
++remove_counter;
}
else
{
last_dir = c1 - c2;
remove_counter = 0;
++it;
}
}
}
inline void optimize_diagonals(uint8_t const *field, Vec2 field_size, std::vector<Vec2> &arr)
{
if (arr.size() < 3)
return;
for (auto it = arr.begin() + 1; it != arr.end() - 1;)
{
Vec2 c0 = *(it - 1);
Vec2 c2 = *(it + 1);
if (!raycast(field, field_size, c2, c0).first) // backwards faster?
{
it = arr.erase(it);
}
else
{
++it;
}
}
}
inline void greedy_optimize_diagonals(uint8_t const *field, Vec2 field_size, std::vector<Vec2> &arr)
{
if (arr.size() < 3)
return;
const auto raycast4 = [&](Vec2 from, Vec2 to) -> bool {
return
raycast(field, field_size, { from.x + 0, from.y + 0}, { to.x + 0, to.y + 0}).first &&
raycast(field, field_size, { from.x + 1, from.y + 0}, { to.x + 1, to.y + 0}).first &&
raycast(field, field_size, { from.x + 0, from.y + 1}, { to.x + 0, to.y + 1}).first &&
raycast(field, field_size, { from.x + 1, from.y + 1}, { to.x + 1, to.y + 1}).first;
};
for (size_t from = 0, to = arr.size() - 1; from < arr.size() - 1;)
{
if (from >= to - 1)
{
++from;
to = arr.size() - 1;
continue;
}
if (!raycast4(arr[from], arr[to]))
{
arr.erase(arr.begin() + from + 1, arr.begin() + to);
++from;
to = arr.size() - 1;
}
else
{
--to;
}
}
}
inline std::pair<bool, Vec2> search_nearest_empty(uint8_t const *field, Vec2 field_size, Vec2 pos)
{
const auto check = [&](int32_t x, int32_t y) -> bool {
return field[y * field_size.x + x] == 0;
};
int32_t pos_x = pos.x;
int32_t pos_y = pos.y;
int32_t box_left = pos_x - 1;
int32_t box_top = pos_y - 1;
int32_t box_right = pos_x + 1;
int32_t box_bottom = pos_y + 1;
for (; (uint32_t)pos_x < field_size.x && (uint32_t)pos_y < field_size.y;)
{
for (; pos_x > box_left; --pos_x)
if (check(pos_x, pos_y))
goto found;
for (; pos_y > box_top; --pos_y)
if (check(pos_x, pos_y))
goto found;
for (; pos_x < box_right; ++pos_x)
if (check(pos_x, pos_y))
goto found;
for (; pos_y < box_bottom; ++pos_y)
if (check(pos_x, pos_y))
goto found;
--box_left;
--box_top;
++box_right;
++box_bottom;
}
return { false, {} };
found:
return { true, { (uint32_t)pos_x, (uint32_t)pos_y } };
}
inline void thicken_walls(uint8_t const *old_field, uint8_t *new_field, Vec2 field_size)
{
for (uint32_t y = 0; y < field_size.y; ++y)
{
for (uint32_t x = 0; x < field_size.x; ++x)
{
const auto &check_neighbour = [&](uint32_t xx, uint32_t yy) {
if (xx >= field_size.x || yy >= field_size.y)
return;
if (old_field[y * field_size.x + x])
new_field[yy * field_size.x + xx] = 1;
};
check_neighbour(x - 1, y - 1);
check_neighbour(x - 1, y );
check_neighbour(x - 1, y + 1);
check_neighbour(x , y - 1);
check_neighbour(x , y );
check_neighbour(x , y + 1);
check_neighbour(x + 1, y - 1);
check_neighbour(x + 1, y );
check_neighbour(x + 1, y + 1);
}
}
}
} // namespace helper
} // namespace astar
/*
void example()
{
astar::Vec2 field_size = { 5, 5 };
uint8_t *field = new uint8_t[field_size.x * field_size.y];
astar::State pathfinder(field, field_size);
pathfinder.start_search({ 0, 0 }, { 4, 4 });
// can run partially between frames
for (;;)
{
auto iter_result = pathfinder.iter(100); // Can pass -1 to search infinitely
if (iter_result == astar::PathFindingResult::Found)
break;
if (iter_result == astar::PathFindingResult::NeedMoreWork)
continue;
if (iter_result == astar::PathFindingResult::NotFound)
throw 1; // No path from 'start' to 'finish' found
}
auto path = pathfinder.get_result();
astar::helper::minimize(path);
astar::helper::optimize_diagonals(field, field_size, path);
astar::helper::greedy_optimize_diagonals(field, field_size, path);
}
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment