Boids

GPU flocking simulation

Quick Start

import { createBoids } from './webgpu-market/boids/boids';

const boids = createBoids(device, {
  count: 2048,
  bounds: { width: 100, height: 100 },
  separationWeight: 1.5,
  alignmentWeight: 1.0,
  cohesionWeight: 1.0,
});

// Each frame
boids.update(deltaTime);

// Bind boids.positions (GPUBuffer of vec2f) in your render pipeline
boids.destroy();

Source

// GPU Boids — Reynolds' flocking simulation
// Each thread processes one boid: reads all other boids for neighbor search (O(n^2)),
// applies separation, alignment, and cohesion rules, then updates position and velocity.
// Uses ping-pong buffers — reads from "in" buffers, writes to "out" buffers.

struct Uniforms {
  delta_time: f32,
  count: u32,
  bounds_width: f32,
  bounds_height: f32,
  separation_weight: f32,
  alignment_weight: f32,
  cohesion_weight: f32,
  separation_radius: f32,
  neighbor_radius: f32,
  max_speed: f32,
  max_force: f32,
  _pad: u32,
}

@group(0) @binding(0) var<uniform> u: Uniforms;
@group(0) @binding(1) var<storage, read> positions_in: array<vec2f>;
@group(0) @binding(2) var<storage, read> velocities_in: array<vec2f>;
@group(0) @binding(3) var<storage, read_write> positions_out: array<vec2f>;
@group(0) @binding(4) var<storage, read_write> velocities_out: array<vec2f>;

// Limit vector magnitude without branching on zero-length vectors
fn limit_vec(v: vec2f, max_len: f32) -> vec2f {
  let len_sq = dot(v, v);
  if (len_sq > max_len * max_len) {
    return normalize(v) * max_len;
  }
  return v;
}

@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) gid: vec3u) {
  let idx = gid.x;
  if (idx >= u.count) {
    return;
  }

  let pos = positions_in[idx];
  let vel = velocities_in[idx];

  // Accumulators for the three flocking rules
  var separation = vec2f(0.0);
  var alignment = vec2f(0.0);
  var cohesion = vec2f(0.0);
  var sep_count = 0u;
  var neighbor_count = 0u;

  // Brute-force neighbor search over all boids
  for (var i = 0u; i < u.count; i++) {
    if (i == idx) {
      continue;
    }

    let other_pos = positions_in[i];
    let diff = pos - other_pos;
    let dist_sq = dot(diff, diff);

    // Separation: steer away from boids that are too close
    let sep_r_sq = u.separation_radius * u.separation_radius;
    if (dist_sq < sep_r_sq && dist_sq > 0.0) {
      let dist = sqrt(dist_sq);
      // Weight inversely by distance — closer boids push harder
      separation += (diff / dist) / dist;
      sep_count++;
    }

    // Alignment and cohesion: consider boids within the neighbor radius
    let neighbor_r_sq = u.neighbor_radius * u.neighbor_radius;
    if (dist_sq < neighbor_r_sq) {
      alignment += velocities_in[i];
      cohesion += other_pos;
      neighbor_count++;
    }
  }

  var force = vec2f(0.0);

  // Separation: average repulsion vectors, scale to max speed, compute steering force
  if (sep_count > 0u) {
    separation /= f32(sep_count);
    if (dot(separation, separation) > 0.0) {
      separation = normalize(separation) * u.max_speed;
      separation -= vel;
      separation = limit_vec(separation, u.max_force);
    }
    force += separation * u.separation_weight;
  }

  // Alignment: steer toward average heading of neighbors
  if (neighbor_count > 0u) {
    alignment /= f32(neighbor_count);
    if (dot(alignment, alignment) > 0.0) {
      alignment = normalize(alignment) * u.max_speed;
      alignment -= vel;
      alignment = limit_vec(alignment, u.max_force);
    }
    force += alignment * u.alignment_weight;
  }

  // Cohesion: steer toward average position of neighbors
  if (neighbor_count > 0u) {
    cohesion /= f32(neighbor_count);
    let desired = cohesion - pos;
    if (dot(desired, desired) > 0.0) {
      let steer = normalize(desired) * u.max_speed - vel;
      force += limit_vec(steer, u.max_force) * u.cohesion_weight;
    }
  }

  // Integrate velocity and position
  var new_vel = vel + force * u.delta_time;
  new_vel = limit_vec(new_vel, u.max_speed);

  var new_pos = pos + new_vel * u.delta_time;

  // Wrap around bounds (toroidal boundary)
  let hw = u.bounds_width * 0.5;
  let hh = u.bounds_height * 0.5;

  if (new_pos.x < -hw) { new_pos.x += u.bounds_width; }
  if (new_pos.x > hw) { new_pos.x -= u.bounds_width; }
  if (new_pos.y < -hh) { new_pos.y += u.bounds_height; }
  if (new_pos.y > hh) { new_pos.y -= u.bounds_height; }

  positions_out[idx] = new_pos;
  velocities_out[idx] = new_vel;
}

// GPU Boids — Reynolds' flocking simulation
// Compute shader updates positions and velocities. Caller handles rendering.
// Uses ping-pong buffers for positions and velocities (read from one, write to other, swap).
//
// Default WGSL loading uses a ?raw import (works with Vite, esbuild, Webpack).
// Alternative: load via fetch — see README.md for details.
import shaderSource from './boids.wgsl?raw';

export interface BoidsOptions {
  count?: number;
  bounds?: { width: number; height: number };
  separationWeight?: number;
  alignmentWeight?: number;
  cohesionWeight?: number;
  separationRadius?: number;
  neighborRadius?: number;
  maxSpeed?: number;
  maxForce?: number;
}

export interface Boids {
  positions: GPUBuffer;
  velocities: GPUBuffer;
  count: number;
  update(deltaTime: number): void;
  destroy(): void;
}

// Uniform buffer layout (std140-aligned):
// f32 deltaTime, u32 count, f32 boundsWidth, f32 boundsHeight,
// f32 separationWeight, f32 alignmentWeight, f32 cohesionWeight, f32 separationRadius,
// f32 neighborRadius, f32 maxSpeed, f32 maxForce, u32 _pad
const UNIFORM_SIZE = 48; // 12 x 4 bytes
const WORKGROUP_SIZE = 64;

export function createBoids(device: GPUDevice, options: BoidsOptions = {}): Boids {
  const count = options.count ?? 2048;
  const boundsWidth = options.bounds?.width ?? 100;
  const boundsHeight = options.bounds?.height ?? 100;
  const separationWeight = options.separationWeight ?? 1.5;
  const alignmentWeight = options.alignmentWeight ?? 1.0;
  const cohesionWeight = options.cohesionWeight ?? 1.0;
  const separationRadius = options.separationRadius ?? 3.0;
  const neighborRadius = options.neighborRadius ?? 6.0;
  const maxSpeed = options.maxSpeed ?? 10.0;
  const maxForce = options.maxForce ?? 5.0;

  const bufferSize = count * 2 * 4; // count * vec2f * sizeof(f32)

  // Initialize positions randomly within bounds, velocities with small random values
  const initialPositions = new Float32Array(count * 2);
  const initialVelocities = new Float32Array(count * 2);
  for (let i = 0; i < count; i++) {
    initialPositions[i * 2] = (Math.random() - 0.5) * boundsWidth;
    initialPositions[i * 2 + 1] = (Math.random() - 0.5) * boundsHeight;
    const angle = Math.random() * Math.PI * 2;
    const speed = Math.random() * maxSpeed * 0.5;
    initialVelocities[i * 2] = Math.cos(angle) * speed;
    initialVelocities[i * 2 + 1] = Math.sin(angle) * speed;
  }

  // Ping-pong buffer pairs for positions and velocities.
  // VERTEX usage lets callers bind these directly for rendering.
  const bufferUsage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.VERTEX;

  const posA = device.createBuffer({
    size: bufferSize,
    usage: bufferUsage,
    mappedAtCreation: true
  });
  new Float32Array(posA.getMappedRange()).set(initialPositions);
  posA.unmap();

  const posB = device.createBuffer({ size: bufferSize, usage: bufferUsage });

  const velA = device.createBuffer({
    size: bufferSize,
    usage: bufferUsage,
    mappedAtCreation: true
  });
  new Float32Array(velA.getMappedRange()).set(initialVelocities);
  velA.unmap();

  const velB = device.createBuffer({ size: bufferSize, usage: bufferUsage });

  const uniformBuffer = device.createBuffer({
    size: UNIFORM_SIZE,
    usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
  });

  const shaderModule = device.createShaderModule({ code: shaderSource });

  const bindGroupLayout = device.createBindGroupLayout({
    entries: [
      { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'uniform' } },
      { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'read-only-storage' } },
      { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'read-only-storage' } },
      { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
      { binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } }
    ]
  });

  const pipeline = device.createComputePipeline({
    layout: device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout] }),
    compute: { module: shaderModule, entryPoint: 'main' }
  });

  // Two bind groups for ping-pong: A->B and B->A
  const bindGroupAtoB = device.createBindGroup({
    layout: bindGroupLayout,
    entries: [
      { binding: 0, resource: { buffer: uniformBuffer } },
      { binding: 1, resource: { buffer: posA } },
      { binding: 2, resource: { buffer: velA } },
      { binding: 3, resource: { buffer: posB } },
      { binding: 4, resource: { buffer: velB } }
    ]
  });

  const bindGroupBtoA = device.createBindGroup({
    layout: bindGroupLayout,
    entries: [
      { binding: 0, resource: { buffer: uniformBuffer } },
      { binding: 1, resource: { buffer: posB } },
      { binding: 2, resource: { buffer: velB } },
      { binding: 3, resource: { buffer: posA } },
      { binding: 4, resource: { buffer: velA } }
    ]
  });

  const uniformData = new ArrayBuffer(UNIFORM_SIZE);
  const f32 = new Float32Array(uniformData);
  const u32 = new Uint32Array(uniformData);

  // Track which buffer is current (false = A is current, true = B is current)
  let useB = false;

  function update(deltaTime: number): void {
    f32[0] = deltaTime;
    u32[1] = count;
    f32[2] = boundsWidth;
    f32[3] = boundsHeight;
    f32[4] = separationWeight;
    f32[5] = alignmentWeight;
    f32[6] = cohesionWeight;
    f32[7] = separationRadius;
    f32[8] = neighborRadius;
    f32[9] = maxSpeed;
    f32[10] = maxForce;
    u32[11] = 0; // padding

    device.queue.writeBuffer(uniformBuffer, 0, uniformData);

    const encoder = device.createCommandEncoder();
    const pass = encoder.beginComputePass();
    pass.setPipeline(pipeline);
    pass.setBindGroup(0, useB ? bindGroupBtoA : bindGroupAtoB);
    pass.dispatchWorkgroups(Math.ceil(count / WORKGROUP_SIZE));
    pass.end();
    device.queue.submit([encoder.finish()]);

    useB = !useB;
  }

  function destroy(): void {
    posA.destroy();
    posB.destroy();
    velA.destroy();
    velB.destroy();
    uniformBuffer.destroy();
  }

  // Expose the current readable buffers via getters.
  // After update(), the "current" buffer is the one that was written to.
  return {
    get positions() {
      return useB ? posB : posA;
    },
    get velocities() {
      return useB ? velB : velA;
    },
    count,
    update,
    destroy
  };
}

Documentation

Boids

Flocking simulation using Reynolds' boids algorithm. A compute shader updates positions and velocities each frame. Rendering is separate.

API

`createBoids(device, options?)`

Returns a Boids instance. Boids are initialized with random positions within bounds and random velocity directions.

Option	Type	Default	Description
`count`	`number`	`2048`	Number of boids
`bounds`	`{ width: number, height: number }`	`100x100`	Simulation area (centered at origin)
`separationWeight`	`number`	`1.5`	Separation force multiplier
`alignmentWeight`	`number`	`1.0`	Alignment force multiplier
`cohesionWeight`	`number`	`1.0`	Cohesion force multiplier
`separationRadius`	`number`	`3.0`	Distance threshold for separation
`neighborRadius`	`number`	`6.0`	Distance threshold for alignment/cohesion
`maxSpeed`	`number`	`10.0`	Maximum boid speed
`maxForce`	`number`	`5.0`	Maximum steering force per frame

`flock.update(deltaTime)`

Dispatches the compute shader to advance the simulation by deltaTime seconds.

`flock.positions`

GPUBuffer containing count vec2<f32> values (current positions). Usages: STORAGE | COPY_DST | VERTEX.

`flock.velocities`

GPUBuffer containing count vec2<f32> values (current velocities). Usages: STORAGE | COPY_DST | VERTEX.

`flock.count`

Number of boids in the simulation.

`flock.destroy()`

Releases all GPU buffers (both ping-pong pairs and uniforms).

Original Research

Craig Reynolds, "Flocks, Herds, and Schools: A Distributed Behavioral Model" (SIGGRAPH 1987) The foundational paper introducing boids. Defines the three rules (separation, alignment, cohesion) and demonstrates emergent flocking behavior from simple local interactions. Reynolds won an Academy Award for Technical Achievement for this work. https://dl.acm.org/doi/10.1145/37402.37406
Craig Reynolds, "Steering Behaviors for Autonomous Characters" (GDC 1999) Extends the original boids model with practical steering behaviors for games: seek, flee, pursue, evade, wander, path following, obstacle avoidance, and leader following. https://www.red3d.com/cwr/steer/gdc99/
Craig Reynolds' Boids page The author's own reference page with links to papers, implementations, and related work. https://www.red3d.com/cwr/boids/

GPU Implementation

WebGPU Samples — Compute Boids The official WebGPU samples include a boids implementation. A useful reference for WebGPU-specific patterns like ping-pong buffers and compute dispatch. https://webgpu.github.io/webgpu-samples/?sample=computeBoids
Simon Green, "Particle Simulation using CUDA" (2010) NVIDIA technical report covering GPU particle systems with spatial hashing for neighbor search. The spatial hash approach described here can be adapted to optimize this module's brute-force neighbor search. https://developer.download.nvidia.com/assets/cuda/files/particles.pdf
Joselli et al., "A Flocking Boids Simulation and Optimization Structure for Mobile Multiprocessor Systems" (2009) Explores optimization strategies for GPU flocking simulations, including spatial partitioning and workload balancing.

Spatial Hashing

Matthias Teschner et al., "Optimized Spatial Hashing for Collision Detection of Deformable Objects" (2003) The spatial hashing technique that can replace brute-force neighbor search for large boid counts. Maps 3D positions to a 1D hash table using a simple modular hash. https://matthias-research.github.io/pages/publications/tetraederCollision.pdf
Green, "GPU Gems 3: Chapter 32 — Broad-Phase Collision Detection with CUDA" GPU-friendly spatial hashing with sort-based construction, directly applicable to optimizing boid neighbor search. https://developer.nvidia.com/gpugems/gpugems3/part-v-physics-simulation/chapter-32-broad-phase-collision-detection-cuda

Emergent Behavior

Iain Couzin et al., "Collective Memory and Spatial Sorting in Animal Groups" (2002) Studies how simple local rules produce complex group-level phenomena in animal collectives. Provides biological context for why the boids model works. https://doi.org/10.1006/jtbi.2002.3065
Vicsek et al., "Novel Type of Phase Transition in a System of Self-Driven Particles" (1995) Physics perspective on flocking as a phase transition. The Vicsek model is a simplified variant of boids that's easier to analyze mathematically. https://doi.org/10.1103/PhysRevLett.75.1226

General References

Daniel Shiffman, "The Nature of Code" — Autonomous Agents Accessible introduction to steering behaviors and flocking, with step-by-step code examples. Good for understanding the math behind the Reynolds model. https://natureofcode.com/autonomous-agents/
Sebastian Lague, "Coding Adventure: Boids" (YouTube) Visual walkthrough of implementing boids with GPU compute shaders, including spatial partitioning optimization. https://www.youtube.com/watch?v=bqtqltqcQhw

Boids

Quick Start

Boids

API

createBoids(device, options?)

flock.update(deltaTime)

flock.positions

flock.velocities

flock.count

flock.destroy()

Further Reading

Original Research

GPU Implementation

Spatial Hashing

Emergent Behavior

General References

`createBoids(device, options?)`

`flock.update(deltaTime)`

`flock.positions`

`flock.velocities`

`flock.count`

`flock.destroy()`