Bloom
Multi-level bloom post-processing
Quick Start
import { createBloom } from './webgpu-market/bloom/bloom';
const bloom = createBloom(device, { format: 'rgba8unorm' });
// Each frame — apply bloom from input texture to output texture
bloom.render(inputTexture, outputTexture, {
threshold: 0.8,
strength: 1.0,
radius: 0.5,
});
bloom.destroy(); Source
// Multi-level bloom — threshold, downsample (Jimenez 13-tap), Gaussian blur, tent upsample, composite
// Based on the Call of Duty: Advanced Warfare bloom presentation (Jimenez 2014)
struct BloomUniforms {
threshold: f32,
soft_knee: f32,
strength: f32,
radius: f32,
texel_size: vec2<f32>,
direction: vec2<f32>,
}
@group(0) @binding(0) var<uniform> u: BloomUniforms;
@group(1) @binding(0) var src_texture: texture_2d<f32>;
@group(1) @binding(1) var src_sampler: sampler;
@group(1) @binding(2) var blend_texture: texture_2d<f32>;
struct VertexOutput {
@builtin(position) position: vec4<f32>,
@location(0) uv: vec2<f32>,
}
@vertex
fn vs_quad(@builtin(vertex_index) vi: u32) -> VertexOutput {
var pos = array<vec2<f32>, 6>(
vec2(-1.0, -1.0), vec2( 1.0, -1.0), vec2(-1.0, 1.0),
vec2(-1.0, 1.0), vec2( 1.0, -1.0), vec2( 1.0, 1.0),
);
var uvs = array<vec2<f32>, 6>(
vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(0.0, 0.0),
vec2(0.0, 0.0), vec2(1.0, 1.0), vec2(1.0, 0.0),
);
var out: VertexOutput;
out.position = vec4(pos[vi], 0.0, 1.0);
out.uv = uvs[vi];
return out;
}
fn luminance(c: vec3<f32>) -> f32 {
return dot(c, vec3(0.2126, 0.7152, 0.0722));
}
// 1. Extract bright regions with soft-knee threshold
@fragment
fn fs_threshold(in: VertexOutput) -> @location(0) vec4<f32> {
let color = textureSample(src_texture, src_sampler, in.uv);
let lum = luminance(color.rgb);
let knee = u.threshold * u.soft_knee;
let contribution = smoothstep(u.threshold - knee, u.threshold + knee, lum);
return vec4(color.rgb * contribution, 1.0);
}
// 2. 13-tap Jimenez downsample (CoD:AW technique) — high quality, anti-flicker
@fragment
fn fs_downsample(in: VertexOutput) -> @location(0) vec4<f32> {
let t = u.texel_size;
let uv = in.uv;
let a = textureSample(src_texture, src_sampler, uv);
let b = textureSample(src_texture, src_sampler, uv + vec2(-0.5, -0.5) * t);
let c = textureSample(src_texture, src_sampler, uv + vec2( 0.5, -0.5) * t);
let d = textureSample(src_texture, src_sampler, uv + vec2(-0.5, 0.5) * t);
let e = textureSample(src_texture, src_sampler, uv + vec2( 0.5, 0.5) * t);
let f = textureSample(src_texture, src_sampler, uv + vec2(-1.0, -1.0) * t);
let g = textureSample(src_texture, src_sampler, uv + vec2( 0.0, -1.0) * t);
let h = textureSample(src_texture, src_sampler, uv + vec2( 1.0, -1.0) * t);
let i = textureSample(src_texture, src_sampler, uv + vec2(-1.0, 0.0) * t);
let j = textureSample(src_texture, src_sampler, uv + vec2( 1.0, 0.0) * t);
let k = textureSample(src_texture, src_sampler, uv + vec2(-1.0, 1.0) * t);
let l = textureSample(src_texture, src_sampler, uv + vec2( 0.0, 1.0) * t);
let m = textureSample(src_texture, src_sampler, uv + vec2( 1.0, 1.0) * t);
var result = a * 0.125;
result += (b + c + d + e) * 0.125;
result += (f + g + i) * (1.0 / 16.0);
result += (g + h + j) * (1.0 / 16.0);
result += (i + k + l) * (1.0 / 16.0);
result += (j + l + m) * (1.0 / 16.0);
return result;
}
// 3. 9-tap separable Gaussian blur
@fragment
fn fs_blur(in: VertexOutput) -> @location(0) vec4<f32> {
let dir = u.direction * u.texel_size * u.radius;
var result = textureSample(src_texture, src_sampler, in.uv) * 0.2270270270;
result += textureSample(src_texture, src_sampler, in.uv + dir * 1.0) * 0.1945945946;
result += textureSample(src_texture, src_sampler, in.uv - dir * 1.0) * 0.1945945946;
result += textureSample(src_texture, src_sampler, in.uv + dir * 2.0) * 0.1216216216;
result += textureSample(src_texture, src_sampler, in.uv - dir * 2.0) * 0.1216216216;
result += textureSample(src_texture, src_sampler, in.uv + dir * 3.0) * 0.0540540541;
result += textureSample(src_texture, src_sampler, in.uv - dir * 3.0) * 0.0540540541;
result += textureSample(src_texture, src_sampler, in.uv + dir * 4.0) * 0.0162162162;
result += textureSample(src_texture, src_sampler, in.uv - dir * 4.0) * 0.0162162162;
return result;
}
// 4. 9-tap tent upsample + additive combine with current level
@fragment
fn fs_upsample(in: VertexOutput) -> @location(0) vec4<f32> {
let t = u.texel_size;
let uv = in.uv;
var bloom = textureSample(src_texture, src_sampler, uv + vec2(-1.0, -1.0) * t) * (1.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2( 0.0, -1.0) * t) * (2.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2( 1.0, -1.0) * t) * (1.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2(-1.0, 0.0) * t) * (2.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv ) * (4.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2( 1.0, 0.0) * t) * (2.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2(-1.0, 1.0) * t) * (1.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2( 0.0, 1.0) * t) * (2.0 / 16.0);
bloom += textureSample(src_texture, src_sampler, uv + vec2( 1.0, 1.0) * t) * (1.0 / 16.0);
let current = textureSample(blend_texture, src_sampler, uv);
return current + bloom;
}
// 5. Final composite: original + bloom × strength
@fragment
fn fs_composite(in: VertexOutput) -> @location(0) vec4<f32> {
let original = textureSample(src_texture, src_sampler, in.uv);
let bloom = textureSample(blend_texture, src_sampler, in.uv);
return vec4(original.rgb + bloom.rgb * u.strength, original.a);
}
import shaderCode from './bloom.wgsl?raw';
export interface BloomParams {
threshold?: number;
strength?: number;
radius?: number;
softKnee?: number;
}
export interface BloomRenderer {
render(input: GPUTexture, output: GPUTexture, params?: BloomParams): void;
destroy(): void;
}
const UNIFORM_SIZE = 32; // 8 floats × 4 bytes
export function createBloom(
device: GPUDevice,
options?: { format?: GPUTextureFormat; levels?: number }
): BloomRenderer {
const format = options?.format ?? 'rgba8unorm';
const levels = options?.levels ?? 5;
const alignment = device.limits.minUniformBufferOffsetAlignment;
const slotSize = Math.max(UNIFORM_SIZE, alignment);
const shaderModule = device.createShaderModule({ label: 'bloom', code: shaderCode });
const linearSampler = device.createSampler({
magFilter: 'linear',
minFilter: 'linear',
addressModeU: 'clamp-to-edge',
addressModeV: 'clamp-to-edge'
});
const uniformBGL = device.createBindGroupLayout({
entries: [
{
binding: 0,
visibility: GPUShaderStage.FRAGMENT,
buffer: { type: 'uniform', hasDynamicOffset: true, minBindingSize: UNIFORM_SIZE }
}
]
});
const singleTexBGL = device.createBindGroupLayout({
entries: [
{ binding: 0, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'float' } },
{ binding: 1, visibility: GPUShaderStage.FRAGMENT, sampler: {} }
]
});
const dualTexBGL = device.createBindGroupLayout({
entries: [
{ binding: 0, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'float' } },
{ binding: 1, visibility: GPUShaderStage.FRAGMENT, sampler: {} },
{ binding: 2, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'float' } }
]
});
const singleTexLayout = device.createPipelineLayout({
bindGroupLayouts: [uniformBGL, singleTexBGL]
});
const dualTexLayout = device.createPipelineLayout({ bindGroupLayouts: [uniformBGL, dualTexBGL] });
function makePipeline(
label: string,
entry: string,
layout: GPUPipelineLayout
): GPURenderPipeline {
return device.createRenderPipeline({
label,
layout,
vertex: { module: shaderModule, entryPoint: 'vs_quad' },
fragment: { module: shaderModule, entryPoint: entry, targets: [{ format }] },
primitive: { topology: 'triangle-list' }
});
}
const thresholdPipeline = makePipeline('bloom threshold', 'fs_threshold', singleTexLayout);
const downsamplePipeline = makePipeline('bloom downsample', 'fs_downsample', singleTexLayout);
const blurPipeline = makePipeline('bloom blur', 'fs_blur', singleTexLayout);
const upsamplePipeline = makePipeline('bloom upsample', 'fs_upsample', dualTexLayout);
const compositePipeline = makePipeline('bloom composite', 'fs_composite', dualTexLayout);
const totalPasses = 1 + (levels - 1) + levels * 2 + (levels - 1) + 1;
const uniformBuffer = device.createBuffer({
label: 'bloom uniforms',
size: totalPasses * slotSize,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
});
const uniformBG = device.createBindGroup({
layout: uniformBGL,
entries: [{ binding: 0, resource: { buffer: uniformBuffer, size: UNIFORM_SIZE } }]
});
let mips: GPUTexture[] = [];
let blurTemps: GPUTexture[] = [];
let allocW = 0;
let allocH = 0;
let mipViews: GPUTextureView[] = [];
let blurTempViews: GPUTextureView[] = [];
let mipSingleBGs: GPUBindGroup[] = [];
let blurTempSingleBGs: GPUBindGroup[] = [];
let upsampleDualBGs: GPUBindGroup[] = [];
let inputSingleBG: GPUBindGroup | null = null;
let cachedInputTex: GPUTexture | null = null;
let compositeDualBG: GPUBindGroup | null = null;
let cachedCompositeTex: GPUTexture | null = null;
function makeSingleBG(view: GPUTextureView): GPUBindGroup {
return device.createBindGroup({
layout: singleTexBGL,
entries: [
{ binding: 0, resource: view },
{ binding: 1, resource: linearSampler }
]
});
}
function makeDualBG(view: GPUTextureView, blendView: GPUTextureView): GPUBindGroup {
return device.createBindGroup({
layout: dualTexBGL,
entries: [
{ binding: 0, resource: view },
{ binding: 1, resource: linearSampler },
{ binding: 2, resource: blendView }
]
});
}
function allocate(w: number, h: number) {
for (const t of mips) t.destroy();
for (const t of blurTemps) t.destroy();
mips = [];
blurTemps = [];
inputSingleBG = null;
cachedInputTex = null;
compositeDualBG = null;
cachedCompositeTex = null;
allocW = w;
allocH = h;
let mw = Math.max(1, w >> 1);
let mh = Math.max(1, h >> 1);
const usage = GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING;
for (let i = 0; i < levels; i++) {
mips.push(device.createTexture({ size: [mw, mh], format, usage }));
blurTemps.push(device.createTexture({ size: [mw, mh], format, usage }));
mw = Math.max(1, mw >> 1);
mh = Math.max(1, mh >> 1);
}
mipViews = mips.map((t) => t.createView());
blurTempViews = blurTemps.map((t) => t.createView());
mipSingleBGs = mipViews.map((v) => makeSingleBG(v));
blurTempSingleBGs = blurTempViews.map((v) => makeSingleBG(v));
upsampleDualBGs = [];
for (let i = levels - 2; i >= 0; i--) {
upsampleDualBGs.push(makeDualBG(mipViews[i + 1], mipViews[i]));
}
}
function render(input: GPUTexture, output: GPUTexture, params?: BloomParams) {
const threshold = params?.threshold ?? 0.8;
const strength = params?.strength ?? 1.0;
const radius = params?.radius ?? 0.5;
const softKnee = params?.softKnee ?? 0.5;
if (input.width !== allocW || input.height !== allocH) {
allocate(input.width, input.height);
}
// Write all uniform slots
const uniformData = new ArrayBuffer(totalPasses * slotSize);
const f32 = new Float32Array(uniformData);
let passIdx = 0;
function writeSlot(tx: number, ty: number, dx: number, dy: number) {
const base = (passIdx * slotSize) / 4;
f32[base] = threshold;
f32[base + 1] = softKnee;
f32[base + 2] = strength;
f32[base + 3] = radius;
f32[base + 4] = tx;
f32[base + 5] = ty;
f32[base + 6] = dx;
f32[base + 7] = dy;
passIdx++;
}
writeSlot(1 / input.width, 1 / input.height, 0, 0);
for (let i = 1; i < levels; i++) writeSlot(1 / mips[i - 1].width, 1 / mips[i - 1].height, 0, 0);
for (let i = 0; i < levels; i++) {
const tw = 1 / mips[i].width;
const th = 1 / mips[i].height;
writeSlot(tw, th, 1, 0);
writeSlot(tw, th, 0, 1);
}
for (let i = levels - 2; i >= 0; i--)
writeSlot(1 / mips[i + 1].width, 1 / mips[i + 1].height, 0, 0);
writeSlot(1 / blurTemps[0].width, 1 / blurTemps[0].height, 0, 0);
device.queue.writeBuffer(uniformBuffer, 0, uniformData);
// Encode passes
const encoder = device.createCommandEncoder({ label: 'bloom' });
passIdx = 0;
function pass(targetView: GPUTextureView, pipeline: GPURenderPipeline, texBG: GPUBindGroup) {
const p = encoder.beginRenderPass({
colorAttachments: [
{
view: targetView,
clearValue: { r: 0, g: 0, b: 0, a: 1 },
loadOp: 'clear',
storeOp: 'store'
}
]
});
p.setPipeline(pipeline);
p.setBindGroup(0, uniformBG, [passIdx * slotSize]);
p.setBindGroup(1, texBG);
p.draw(6);
p.end();
passIdx++;
}
if (!inputSingleBG || cachedInputTex !== input) {
cachedInputTex = input;
inputSingleBG = makeSingleBG(input.createView());
}
pass(mipViews[0], thresholdPipeline, inputSingleBG);
for (let i = 1; i < levels; i++) pass(mipViews[i], downsamplePipeline, mipSingleBGs[i - 1]);
for (let i = 0; i < levels; i++) {
pass(blurTempViews[i], blurPipeline, mipSingleBGs[i]);
pass(mipViews[i], blurPipeline, blurTempSingleBGs[i]);
}
let upIdx = 0;
for (let i = levels - 2; i >= 0; i--)
pass(blurTempViews[i], upsamplePipeline, upsampleDualBGs[upIdx++]);
if (!compositeDualBG || cachedCompositeTex !== input) {
cachedCompositeTex = input;
compositeDualBG = makeDualBG(input.createView(), blurTempViews[0]);
}
pass(output.createView(), compositePipeline, compositeDualBG);
device.queue.submit([encoder.finish()]);
}
function destroy() {
for (const t of mips) t.destroy();
for (const t of blurTemps) t.destroy();
uniformBuffer.destroy();
}
return { render, destroy };
}
Documentation
bloom
Multi-level bloom post-processing. Extracts bright regions, builds a mipmap pyramid with Jimenez 13-tap downsampling, applies separable Gaussian blur at each level, upsamples with tent filtering, and composites the result back onto the original image.
API
createBloom(device, options?)
device—GPUDeviceoptions.format— Texture format. Default'rgba8unorm'.options.levels— Number of mip levels in the bloom pyramid. Default5.
Returns { render, destroy }.
render(input, output, params?)
Applies bloom from input texture to output texture. Both must have TEXTURE_BINDING usage; output must also have RENDER_ATTACHMENT.
params.threshold— Brightness threshold for bloom extraction. Default0.8.params.strength— Bloom intensity multiplier. Default1.0.params.radius— Blur radius scale. Default0.5.params.softKnee— Soft threshold transition width. Default0.5.
Further Reading
Further Reading
Rationale
Bloom simulates the way bright light bleeds into surrounding areas in cameras and the human eye. It's one of the most common post-processing effects in games and real-time graphics — any scene with HDR lighting, emissive materials, or bright light sources benefits from bloom. Without it, bright areas look flat and clipped.
This module implements a production-quality bloom pipeline used in shipped AAA games, in a single self-contained WebGPU file.
Original Research
Jorge Jimenez (2014) — Next Generation Post Processing in Call of Duty: Advanced Warfare — The presentation that introduced the 13-tap downsample filter used in this module. The key insight is using a weighted combination of 13 bilinear taps instead of a simple box filter, which prevents aliasing and fireflies in the bloom. https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare/
Kawase (2003) — Frame Buffer Postprocessing Effects in DOUBLE-S.T.E.A.L — An earlier approach using iterative dual-blur that's cheaper but lower quality. Good context for understanding the design space.
Existing Implementations
- Unity HDRP Bloom — Unity's built-in bloom uses the same Jimenez downsample approach.
- three.js UnrealBloomPass — Three.js implementation of multi-level bloom, good for cross-referencing. https://github.com/mrdoob/three.js/blob/dev/examples/jsm/postprocessing/UnrealBloomPass.js
Further Learning
- LearnOpenGL: Bloom — Step-by-step tutorial explaining the bloom pipeline with diagrams. https://learnopengl.com/Advanced-Lighting/Bloom
- Alexander Christensen: HDR Bloom — Practical guide to implementing bloom in a modern rendering pipeline, covers threshold, downsampling, and compositing.