Pathfinding
Pathfinding on 2D weighted grids
Quick Start
import { createPathfinder } from './webgpu-market/pathfinding/pathfinding';
const pathfinder = createPathfinder(device, { width: 256, height: 256 });
// Set terrain costs (negative = impassable)
pathfinder.setCostGrid(costs);
const result = await pathfinder.findPath({
startX: 10, startY: 10,
endX: 240, endY: 240,
});
if (result.found) {
console.log(result.path, result.distance);
}
pathfinder.destroy(); Source
// GPU Pathfinding — Wavefront expansion (parallel Dijkstra)
//
// Finds shortest paths on a 2D weighted grid. Each iteration,
// all cells at the current cost frontier are expanded in parallel.
//
// Three kernels:
// 1. expand — propagate frontier cells to their neighbours
// 2. reconstruct — deterministically assign parent pointers from converged distances
// 3. trace — backtrack from end to start using parent pointers
//
// Note: grid_size × max_terrain_cost must be below ~4,000,000
// due to fixed-point u32 distance representation.
struct Uniforms {
grid_width: u32,
grid_height: u32,
start_x: u32,
start_y: u32,
end_x: u32,
end_y: u32,
_pad0: u32,
_pad1: u32,
}
@group(0) @binding(0) var<uniform> u: Uniforms;
@group(0) @binding(1) var<storage, read> cost_grid: array<f32>;
@group(0) @binding(2) var<storage, read_write> dist: array<atomic<u32>>;
@group(0) @binding(3) var<storage, read_write> parent: array<i32>;
@group(0) @binding(4) var<storage, read_write> frontier_flag: array<atomic<u32>>;
// frontier_flag[0] = 1 if any cell was updated this iteration (continue expanding)
// Fixed-point u32 distances: float * SCALE.
// Max representable distance: ~4,294,967 cost units.
const INF: u32 = 0xFFFFFFFFu;
const SCALE: f32 = 1000.0;
fn cell_index(x: u32, y: u32) -> u32 {
return y * u.grid_width + x;
}
fn f32_to_dist(d: f32) -> u32 {
return u32(d * SCALE);
}
// 4-connected neighbours (up, down, left, right)
const DX: array<i32, 4> = array<i32, 4>(0, 0, -1, 1);
const DY: array<i32, 4> = array<i32, 4>(-1, 1, 0, 0);
// ============================================================
// Kernel 1: Expand frontier
// Each thread processes one grid cell. If its distance is not INF,
// propagate to its neighbours using atomicMin.
// No parent writes here — that's done in a separate pass after
// convergence to avoid race conditions.
// ============================================================
@compute @workgroup_size(16, 16)
fn expand(@builtin(global_invocation_id) gid: vec3u) {
if (gid.x >= u.grid_width || gid.y >= u.grid_height) {
return;
}
let idx = cell_index(gid.x, gid.y);
let my_dist = atomicLoad(&dist[idx]);
if (my_dist == INF) {
return;
}
for (var i = 0u; i < 4u; i++) {
let nx = i32(gid.x) + DX[i];
let ny = i32(gid.y) + DY[i];
if (nx < 0 || nx >= i32(u.grid_width) || ny < 0 || ny >= i32(u.grid_height)) {
continue;
}
let nidx = cell_index(u32(nx), u32(ny));
let terrain_cost = cost_grid[nidx];
if (terrain_cost < 0.0) {
continue;
}
let new_dist = my_dist + f32_to_dist(terrain_cost);
let old_dist = atomicMin(&dist[nidx], new_dist);
if (new_dist < old_dist) {
atomicStore(&frontier_flag[0], 1u);
}
}
}
// ============================================================
// Kernel 2: Reconstruct parent pointers
// Runs once after the distance field has converged. For each cell,
// find the neighbour whose dist + edge_cost == my_dist. This is
// deterministic and race-free since distances are final.
// ============================================================
@compute @workgroup_size(16, 16)
fn reconstruct(@builtin(global_invocation_id) gid: vec3u) {
if (gid.x >= u.grid_width || gid.y >= u.grid_height) {
return;
}
let idx = cell_index(gid.x, gid.y);
let my_dist = atomicLoad(&dist[idx]);
// Start cell or unreachable — no parent
if (my_dist == 0u || my_dist == INF) {
return;
}
let my_cost = cost_grid[idx];
if (my_cost < 0.0) {
return;
}
// Safe: my_dist >= f32_to_dist(my_cost) always holds because a cell's
// distance is the sum of all terrain costs along its shortest path,
// which includes at least my_cost as the final step.
let expected = my_dist - f32_to_dist(my_cost);
for (var i = 0u; i < 4u; i++) {
let nx = i32(gid.x) + DX[i];
let ny = i32(gid.y) + DY[i];
if (nx < 0 || nx >= i32(u.grid_width) || ny < 0 || ny >= i32(u.grid_height)) {
continue;
}
let nidx = cell_index(u32(nx), u32(ny));
if (atomicLoad(&dist[nidx]) == expected) {
parent[idx] = i32(nidx);
return;
}
}
}
// ============================================================
// Kernel 3: Trace path
// Backtrack from end to start using parent pointers.
// ============================================================
@group(0) @binding(5) var<storage, read_write> path_out: array<u32>;
@group(0) @binding(6) var<storage, read_write> path_length: array<atomic<u32>>;
@compute @workgroup_size(1)
fn trace() {
let end_idx = cell_index(u.end_x, u.end_y);
let start_idx = cell_index(u.start_x, u.start_y);
if (atomicLoad(&dist[end_idx]) == INF) {
return;
}
var current = i32(end_idx);
var length = 0u;
let max_path = u.grid_width * u.grid_height;
loop {
if (length >= max_path) { break; }
path_out[length] = u32(current);
length++;
if (u32(current) == start_idx) { break; }
current = parent[u32(current)];
if (current < 0) { break; }
}
atomicStore(&path_length[0], length);
}
// GPU Pathfinding
// Finds shortest paths on a 2D weighted grid using wavefront expansion
// (parallel Dijkstra). Each iteration expands all frontier cells simultaneously.
//
// Default WGSL loading uses a ?raw import (works with Vite, esbuild, Webpack).
// Alternative: load via fetch — see README.md for details.
import shaderSource from './pathfinding.wgsl?raw';
export interface PathfinderOptions {
width: number;
height: number;
}
export interface FindPathOptions {
startX: number;
startY: number;
endX: number;
endY: number;
maxIterations?: number;
}
export interface PathResult {
path: number[];
distance: number;
found: boolean;
}
export interface Pathfinder {
setCostGrid(costs: Float32Array): void;
findPath(options: FindPathOptions): Promise<PathResult>;
getDistanceField(): Promise<Float32Array>;
destroy(): void;
}
const EXPAND_WORKGROUP = 16;
const INF_U32 = 0xffffffff;
const SCALE = 1000.0;
const BATCH_SIZE = 8; // Expand iterations between readbacks
// Uniform buffer: u32 grid_width, u32 grid_height, u32 start_x, u32 start_y,
// u32 end_x, u32 end_y, f32 max_cost, u32 _pad = 32 bytes
const UNIFORM_SIZE = 32;
export function createPathfinder(device: GPUDevice, options: PathfinderOptions): Pathfinder {
const { width, height } = options;
const cellCount = width * height;
const shaderModule = device.createShaderModule({ code: shaderSource });
// Cost grid — terrain weights. Negative = impassable.
const costGridBuffer = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
});
// Distance field — fixed-point u32 (float * 1000) for atomicMin
const distBuffer = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
});
// Parent pointers for path reconstruction
const parentBuffer = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
});
// Frontier flag — set to 1 if any cell was updated
const frontierFlagBuffer = device.createBuffer({
size: 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
});
// Path output buffer
const pathBuffer = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
});
// Path length counter
const pathLengthBuffer = device.createBuffer({
size: 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
});
const uniformBuffer = device.createBuffer({
size: UNIFORM_SIZE,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
});
// Readback buffers
const flagReadback = device.createBuffer({
size: 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
const pathLengthReadback = device.createBuffer({
size: 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
const pathReadback = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
const distReadback = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
// Separate readback for getDistanceField to avoid conflicts with findPath
const distFieldReadback = device.createBuffer({
size: cellCount * 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
// Expand pipeline (bindings 0-4)
const expandBindGroupLayout = device.createBindGroupLayout({
entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'uniform' } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'read-only-storage' } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } }
]
});
// Trace pipeline (bindings 0-6)
const traceBindGroupLayout = device.createBindGroupLayout({
entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'uniform' } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'read-only-storage' } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } }
]
});
const expandPipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [expandBindGroupLayout] }),
compute: { module: shaderModule, entryPoint: 'expand' }
});
const reconstructPipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [expandBindGroupLayout] }),
compute: { module: shaderModule, entryPoint: 'reconstruct' }
});
const tracePipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [traceBindGroupLayout] }),
compute: { module: shaderModule, entryPoint: 'trace' }
});
const expandBindGroup = device.createBindGroup({
layout: expandBindGroupLayout,
entries: [
{ binding: 0, resource: { buffer: uniformBuffer } },
{ binding: 1, resource: { buffer: costGridBuffer } },
{ binding: 2, resource: { buffer: distBuffer } },
{ binding: 3, resource: { buffer: parentBuffer } },
{ binding: 4, resource: { buffer: frontierFlagBuffer } }
]
});
const traceBindGroup = device.createBindGroup({
layout: traceBindGroupLayout,
entries: [
{ binding: 0, resource: { buffer: uniformBuffer } },
{ binding: 1, resource: { buffer: costGridBuffer } },
{ binding: 2, resource: { buffer: distBuffer } },
{ binding: 3, resource: { buffer: parentBuffer } },
{ binding: 4, resource: { buffer: frontierFlagBuffer } },
{ binding: 5, resource: { buffer: pathBuffer } },
{ binding: 6, resource: { buffer: pathLengthBuffer } }
]
});
// Initialize cost grid to 1.0 (uniform traversal cost)
const defaultCosts = new Float32Array(cellCount).fill(1.0);
device.queue.writeBuffer(costGridBuffer, 0, defaultCosts);
const uniformData = new ArrayBuffer(UNIFORM_SIZE);
const uniformU32 = new Uint32Array(uniformData);
const dispatchX = Math.ceil(width / EXPAND_WORKGROUP);
const dispatchY = Math.ceil(height / EXPAND_WORKGROUP);
// Concurrency guard — promise chain serializes any number of concurrent callers
let queue: Promise<unknown> = Promise.resolve();
// Pre-allocated init buffers to avoid GC churn per findPath call
const initDist = new Uint32Array(cellCount).fill(INF_U32);
const initParent = new Int32Array(cellCount).fill(-1);
let prevStartIdx = -1;
function setCostGrid(costs: Float32Array): void {
if (costs.length !== cellCount) {
throw new RangeError(
`setCostGrid: expected ${cellCount} values (${width}×${height}), got ${costs.length}`
);
}
device.queue.writeBuffer(costGridBuffer, 0, costs);
}
async function findPath(opts: FindPathOptions): Promise<PathResult> {
const result = queue.then(() => findPathInternal(opts));
queue = result.catch(() => {});
return result;
}
async function findPathInternal(opts: FindPathOptions): Promise<PathResult> {
// Input validation
if (
opts.startX < 0 ||
opts.startX >= width ||
opts.startY < 0 ||
opts.startY >= height ||
opts.endX < 0 ||
opts.endX >= width ||
opts.endY < 0 ||
opts.endY >= height
) {
throw new RangeError(`findPath: coordinates out of bounds for ${width}×${height} grid`);
}
const maxIterations = opts.maxIterations ?? width + height;
// Write uniforms
uniformU32[0] = width;
uniformU32[1] = height;
uniformU32[2] = opts.startX;
uniformU32[3] = opts.startY;
uniformU32[4] = opts.endX;
uniformU32[5] = opts.endY;
uniformU32[6] = 0;
uniformU32[7] = 0;
device.queue.writeBuffer(uniformBuffer, 0, uniformData);
// Reset previous start, set new start to 0
if (prevStartIdx >= 0) {
initDist[prevStartIdx] = INF_U32;
}
const startIdx = opts.startY * width + opts.startX;
initDist[startIdx] = 0;
prevStartIdx = startIdx;
device.queue.writeBuffer(distBuffer, 0, initDist);
device.queue.writeBuffer(parentBuffer, 0, initParent);
// Reset path length
device.queue.writeBuffer(pathLengthBuffer, 0, new Uint32Array([0]));
// Iteratively expand the frontier in batches to reduce CPU-GPU round-trips
let iter = 0;
while (iter < maxIterations) {
const batchEnd = Math.min(iter + BATCH_SIZE, maxIterations);
// Reset frontier flag before the batch, then run BATCH_SIZE expands.
// If any cell was updated in the entire batch, continue expanding.
device.queue.writeBuffer(frontierFlagBuffer, 0, new Uint32Array([0]));
const batchEncoder = device.createCommandEncoder();
for (let b = iter; b < batchEnd; b++) {
const pass = batchEncoder.beginComputePass();
pass.setPipeline(expandPipeline);
pass.setBindGroup(0, expandBindGroup);
pass.dispatchWorkgroups(dispatchX, dispatchY);
pass.end();
}
// Read back frontier flag after the batch
batchEncoder.copyBufferToBuffer(frontierFlagBuffer, 0, flagReadback, 0, 4);
device.queue.submit([batchEncoder.finish()]);
await flagReadback.mapAsync(GPUMapMode.READ);
const flag = new Uint32Array(flagReadback.getMappedRange().slice(0))[0];
flagReadback.unmap();
iter = batchEnd;
if (flag === 0) break; // No cells updated in entire batch — converged
}
// Reconstruct parent pointers deterministically from converged distances
const reconEncoder = device.createCommandEncoder();
const reconPass = reconEncoder.beginComputePass();
reconPass.setPipeline(reconstructPipeline);
reconPass.setBindGroup(0, expandBindGroup);
reconPass.dispatchWorkgroups(dispatchX, dispatchY);
reconPass.end();
device.queue.submit([reconEncoder.finish()]);
// Trace path
const traceEncoder = device.createCommandEncoder();
const tracePass = traceEncoder.beginComputePass();
tracePass.setPipeline(tracePipeline);
tracePass.setBindGroup(0, traceBindGroup);
tracePass.dispatchWorkgroups(1);
tracePass.end();
traceEncoder.copyBufferToBuffer(pathLengthBuffer, 0, pathLengthReadback, 0, 4);
device.queue.submit([traceEncoder.finish()]);
await pathLengthReadback.mapAsync(GPUMapMode.READ);
const pathLength = new Uint32Array(pathLengthReadback.getMappedRange().slice(0))[0];
pathLengthReadback.unmap();
if (pathLength === 0) {
return { path: [], distance: Infinity, found: false };
}
// Read back path and distances
const readEncoder = device.createCommandEncoder();
readEncoder.copyBufferToBuffer(pathBuffer, 0, pathReadback, 0, pathLength * 4);
readEncoder.copyBufferToBuffer(distBuffer, 0, distReadback, 0, cellCount * 4);
device.queue.submit([readEncoder.finish()]);
await pathReadback.mapAsync(GPUMapMode.READ);
const pathData = new Uint32Array(pathReadback.getMappedRange().slice(0, pathLength * 4));
pathReadback.unmap();
const path = Array.from(pathData).reverse();
await distReadback.mapAsync(GPUMapMode.READ);
const distData = new Uint32Array(distReadback.getMappedRange().slice(0));
const endIdx = opts.endY * width + opts.endX;
const distance = distData[endIdx] / SCALE;
distReadback.unmap();
return { path, distance, found: true };
}
async function getDistanceField(): Promise<Float32Array> {
// Serialize with findPath to avoid buffer conflicts
const result = queue.then(async () => {
const encoder = device.createCommandEncoder();
encoder.copyBufferToBuffer(distBuffer, 0, distFieldReadback, 0, cellCount * 4);
device.queue.submit([encoder.finish()]);
await distFieldReadback.mapAsync(GPUMapMode.READ);
const raw = new Uint32Array(distFieldReadback.getMappedRange().slice(0));
distFieldReadback.unmap();
const field = new Float32Array(cellCount);
for (let i = 0; i < cellCount; i++) {
field[i] = raw[i] === INF_U32 ? Infinity : raw[i] / SCALE;
}
return field;
});
queue = result.catch(() => {});
return result;
}
function destroy(): void {
costGridBuffer.destroy();
distBuffer.destroy();
parentBuffer.destroy();
frontierFlagBuffer.destroy();
pathBuffer.destroy();
pathLengthBuffer.destroy();
uniformBuffer.destroy();
flagReadback.destroy();
pathLengthReadback.destroy();
pathReadback.destroy();
distFieldReadback.destroy();
distReadback.destroy();
}
return { setCostGrid, findPath, getDistanceField, destroy };
}
Documentation
Pathfinding
Shortest path finding on 2D weighted grids. Uses wavefront expansion (parallel Dijkstra) — each iteration expands all frontier cells in a single dispatch.
API
createPathfinder(device, options)
Returns a Pathfinder instance.
| Option | Type | Default | Description |
|---|---|---|---|
width |
number |
(required) | Grid width in cells |
height |
number |
(required) | Grid height in cells |
pathfinder.setCostGrid(costs)
Upload terrain costs. costs is a Float32Array of width * height values.
1.0= normal traversal cost> 1.0= slower terrain (e.g., 3.0 for swamp)< 0= impassable (wall)
The cost grid persists between findPath calls until you call setCostGrid again.
pathfinder.findPath(options)
Returns a Promise<PathResult>.
| Option | Type | Default | Description |
|---|---|---|---|
startX |
number |
(required) | Start cell X coordinate |
startY |
number |
(required) | Start cell Y coordinate |
endX |
number |
(required) | End cell X coordinate |
endY |
number |
(required) | End cell Y coordinate |
maxIterations |
number |
width + height |
Max wavefront iterations |
PathResult:
| Field | Type | Description |
|---|---|---|
path |
number[] |
Cell indices from start to end |
distance |
number |
Total path cost |
found |
boolean |
Whether a path exists |
Cell indices can be converted to coordinates: x = index % width, y = Math.floor(index / width).
pathfinder.getDistanceField()
Returns the full distance field as a Float32Array. Each cell contains the shortest distance from the start position. Unreachable cells have Infinity. The distance field is the intermediate result of wavefront expansion, exposed for direct use.
pathfinder.destroy()
Releases all GPU buffers.
Further Reading
Further Reading
Resources on pathfinding algorithms, GPU parallelization, and game AI navigation.
Core Algorithms
Dijkstra, "A Note on Two Problems in Connexion with Graphs" (1959) The original shortest path algorithm. The wavefront expansion in this module is a parallel variant of Dijkstra's algorithm, using the same greedy relaxation principle. https://dl.acm.org/doi/10.1007/BF01386390
Hart, Nilsson, Raphael, "A Formal Basis for the Heuristic Determination of Minimum Cost Paths" (1968) The A* algorithm paper. Introduces the admissible heuristic concept (
f = g + h) that guides search toward the goal. The README describes how to extend this module with an A* heuristic. https://ieeexplore.ieee.org/document/4082128Harabor, Grastien, "Online Graph Pruning for Pathfinding on Grid Maps" (2011) Jump Point Search (JPS) — a faster variant of A* on uniform-cost grids that skips intermediate nodes. Can be adapted for GPU execution. https://ojs.aaai.org/index.php/AAAI/article/view/7994
GPU Pathfinding
Bleiweiss, "GPU Accelerated Pathfinding" (GDC 2008) One of the earliest practical presentations on GPU pathfinding for games. Covers wavefront expansion, parallel BFS, and integration with game engines. https://diglib.eg.org/handle/10.2312/EGGH.EGGH08.065-074
Fischer, Dachsbacher, "GPU-Accelerated A* Pathfinding" (2009) Proposes a parallel A* implementation using work queues on the GPU. Demonstrates speedups over CPU A* on large grids.
Delaunay, Meneveaux, "Parallel Dijkstra's Algorithm on GPU" (2019) Analyses wavefront-based parallel Dijkstra on modern GPU architectures with atomic operations, similar to the approach used in this module.
Flow Fields and Navigation
Emerson, "Crowd Pathfinding and Steering Using Flow Field Tiles" (GDC 2015) How Planetary Annihilation uses flow fields (derived from distance fields like ours) for thousands of units navigating simultaneously.
Sturtevant, "Benchmarks for Grid-Based Pathfinding" (2012) Standard benchmark maps and methodology for evaluating pathfinding algorithms. Useful for testing and comparing performance. https://movingai.com/benchmarks/
Game Development Context
Amit Patel, "Introduction to A*" (Red Blob Games) The best interactive visual introduction to A*, Dijkstra, BFS, and graph search in general. Essential reading for understanding pathfinding intuitively. https://www.redblobgames.com/pathfinding/a-star/introduction.html
Amit Patel, "Implementation of A*" (Red Blob Games) Practical implementation guide covering priority queues, heuristics, grid representations, and common pitfalls. Companion to the theory article above. https://www.redblobgames.com/pathfinding/a-star/implementation.html
Millington, Funge, "Artificial Intelligence for Games" (2009) Comprehensive textbook covering pathfinding, steering, decision making, and other game AI topics. Chapters 4-5 cover pathfinding in depth.
WebGPU Atomics
- WebGPU Specification — Atomic Operations
The
atomicMinoperation used in this module's wavefront expansion. Understanding atomic semantics is key to reasoning about the algorithm's correctness. https://www.w3.org/TR/WGSL/#atomic-builtin-functions