Gaussian Blur
Separable Gaussian blur
Quick Start
import { createGaussianBlur } from './webgpu-market/gaussian-blur/gaussian-blur';
const blur = createGaussianBlur(device, { format: 'rgba8unorm' });
const output = device.createTexture({
size: [1024, 768],
format: 'rgba8unorm',
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING,
});
blur.apply(sourceTexture, output, { radius: 8, sigma: 4.0 });
// output now contains the blurred result
blur.destroy(); Source
// Separable Gaussian blur fragment shader
//
// Samples along a single axis (horizontal or vertical) using
// precomputed kernel weights. Uses hardware texture sampling
// for bilinear filtering and edge clamping.
struct Uniforms {
direction: vec2f, // (1,0) for horizontal, (0,1) for vertical
texel_size: vec2f, // 1.0 / texture dimensions
radius: i32,
_pad0: i32,
_pad1: i32,
_pad2: i32,
}
// Kernel weights stored as an array of f32.
// Max 33 weights (center + up to 32 on each side, but weights are symmetric).
@group(0) @binding(0) var<uniform> u: Uniforms;
@group(0) @binding(1) var<storage, read> weights: array<f32>;
@group(0) @binding(2) var source_tex: texture_2d<f32>;
@group(0) @binding(3) var source_sampler: sampler;
struct VertexOutput {
@builtin(position) position: vec4f,
@location(0) uv: vec2f,
}
// Fullscreen triangle — 3 vertices cover the entire screen
@vertex
fn vs(@builtin(vertex_index) i: u32) -> VertexOutput {
let uv = vec2f(f32((i << 1u) & 2u), f32(i & 2u));
var out: VertexOutput;
out.position = vec4f(uv * 2.0 - 1.0, 0.0, 1.0);
out.uv = vec2f(uv.x, 1.0 - uv.y);
return out;
}
@fragment
fn fs(in: VertexOutput) -> @location(0) vec4f {
// Center sample (weight at index 0)
var color = textureSample(source_tex, source_sampler, in.uv) * weights[0];
// Symmetric samples on both sides of center
let step = u.direction * u.texel_size;
for (var i = 1i; i <= u.radius; i++) {
let offset = step * f32(i);
let w = weights[i];
color += textureSample(source_tex, source_sampler, in.uv + offset) * w;
color += textureSample(source_tex, source_sampler, in.uv - offset) * w;
}
return color;
}
// Gaussian Blur
// Two-pass separable Gaussian blur using render passes.
// Takes a source GPUTexture and writes the blurred result to a caller-provided target GPUTexture.
//
// Default WGSL loading uses a ?raw import (works with Vite, esbuild, Webpack).
// Alternative: load via fetch — see README.md for details.
import shaderSource from './gaussian-blur.wgsl?raw';
export interface GaussianBlurOptions {
format?: GPUTextureFormat;
}
export interface GaussianBlurApplyOptions {
radius?: number;
sigma?: number;
}
export interface GaussianBlur {
apply(source: GPUTexture, target: GPUTexture, options?: GaussianBlurApplyOptions): void;
destroy(): void;
}
const MAX_RADIUS = 32;
// Uniform buffer: vec2f direction, vec2f texel_size, i32 radius, 3x i32 pad = 32 bytes
const UNIFORM_SIZE = 32;
// Compute normalized Gaussian kernel weights for the given radius and sigma.
// Returns an array of (radius + 1) values: weights[0] is center, weights[i] is
// the symmetric weight at offset ±i. The weights sum to 1.0.
function computeKernel(radius: number, sigma: number): Float32Array {
const safeSigma = Math.max(sigma, 0.0001);
const size = radius + 1;
const kernel = new Float32Array(size);
let sum = 0;
for (let i = 0; i < size; i++) {
const w = Math.exp((-i * i) / (2 * safeSigma * safeSigma));
kernel[i] = w;
sum += i === 0 ? w : w * 2; // center counted once, sides counted twice
}
// Normalize
for (let i = 0; i < size; i++) {
kernel[i] /= sum;
}
return kernel;
}
export function createGaussianBlur(
device: GPUDevice,
options: GaussianBlurOptions = {}
): GaussianBlur {
const format = options.format ?? 'rgba8unorm';
const shaderModule = device.createShaderModule({ code: shaderSource });
const sampler = device.createSampler({
magFilter: 'linear',
minFilter: 'linear',
addressModeU: 'clamp-to-edge',
addressModeV: 'clamp-to-edge'
});
const hUniformBuffer = device.createBuffer({
size: UNIFORM_SIZE,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
});
const vUniformBuffer = device.createBuffer({
size: UNIFORM_SIZE,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
});
// Weights buffer: max 33 floats (center + 32 per side)
const weightsBuffer = device.createBuffer({
size: (MAX_RADIUS + 1) * 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
});
const bindGroupLayout = device.createBindGroupLayout({
entries: [
{
binding: 0,
visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX,
buffer: { type: 'uniform' }
},
{ binding: 1, visibility: GPUShaderStage.FRAGMENT, buffer: { type: 'read-only-storage' } },
{ binding: 2, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'float' } },
{ binding: 3, visibility: GPUShaderStage.FRAGMENT, sampler: {} }
]
});
const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout] });
const pipeline = device.createRenderPipeline({
layout: pipelineLayout,
vertex: { module: shaderModule, entryPoint: 'vs' },
fragment: {
module: shaderModule,
entryPoint: 'fs',
targets: [{ format }]
},
primitive: { topology: 'triangle-list' }
});
// Hoisted uniform data to avoid per-frame allocations
const uniformData = new ArrayBuffer(UNIFORM_SIZE);
const uniformF32 = new Float32Array(uniformData);
const uniformI32 = new Int32Array(uniformData);
// Mutable intermediate texture and cached bind groups — recreated when dimensions change
let intermediateTexture: GPUTexture | null = null;
let verticalBindGroup: GPUBindGroup | null = null;
let lastWidth = 0;
let lastHeight = 0;
let lastTarget: GPUTexture | null = null;
function ensureIntermediate(width: number, height: number): void {
if (width === lastWidth && height === lastHeight) return;
intermediateTexture?.destroy();
intermediateTexture = device.createTexture({
size: [width, height],
format,
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING
});
lastWidth = width;
lastHeight = height;
// Intermediate changed, so vertical bind group needs rebuild
verticalBindGroup = null;
lastTarget = null;
}
function writeUniforms(
buffer: GPUBuffer,
direction: [number, number],
width: number,
height: number,
radius: number
): void {
uniformF32[0] = direction[0];
uniformF32[1] = direction[1];
uniformF32[2] = 1.0 / width;
uniformF32[3] = 1.0 / height;
uniformI32[4] = radius;
device.queue.writeBuffer(buffer, 0, uniformData);
}
function createSourceBindGroup(sourceTexture: GPUTexture): GPUBindGroup {
return device.createBindGroup({
layout: bindGroupLayout,
entries: [
{ binding: 0, resource: { buffer: hUniformBuffer } },
{ binding: 1, resource: { buffer: weightsBuffer } },
{ binding: 2, resource: sourceTexture.createView() },
{ binding: 3, resource: sampler }
]
});
}
function ensureVerticalBindGroup(target: GPUTexture): void {
if (verticalBindGroup && lastTarget === target) return;
verticalBindGroup = device.createBindGroup({
layout: bindGroupLayout,
entries: [
{ binding: 0, resource: { buffer: vUniformBuffer } },
{ binding: 1, resource: { buffer: weightsBuffer } },
{ binding: 2, resource: intermediateTexture!.createView() },
{ binding: 3, resource: sampler }
]
});
lastTarget = target;
}
function apply(
source: GPUTexture,
target: GPUTexture,
applyOptions: GaussianBlurApplyOptions = {}
): void {
const width = source.width;
const height = source.height;
const radius = Math.min(Math.max(applyOptions.radius ?? 8, 0), MAX_RADIUS);
const sigma = applyOptions.sigma ?? radius / 2;
if (radius === 0) {
// No blur — pass-through render using weight of 1.0 at center
device.queue.writeBuffer(weightsBuffer, 0, new Float32Array([1.0]));
writeUniforms(hUniformBuffer, [1, 0], width, height, 0);
const bindGroup = createSourceBindGroup(source);
const encoder = device.createCommandEncoder();
const pass = encoder.beginRenderPass({
colorAttachments: [
{
view: target.createView(),
loadOp: 'clear',
storeOp: 'store'
}
]
});
pass.setPipeline(pipeline);
pass.setBindGroup(0, bindGroup);
pass.draw(3);
pass.end();
device.queue.submit([encoder.finish()]);
return;
}
ensureIntermediate(width, height);
// Upload kernel weights
const kernel = computeKernel(radius, sigma);
device.queue.writeBuffer(weightsBuffer, 0, kernel);
const encoder = device.createCommandEncoder();
// Pass 1: Horizontal blur (source → intermediate)
writeUniforms(hUniformBuffer, [1, 0], width, height, radius);
const hBindGroup = createSourceBindGroup(source);
const hPass = encoder.beginRenderPass({
colorAttachments: [
{
view: intermediateTexture!.createView(),
loadOp: 'clear',
storeOp: 'store'
}
]
});
hPass.setPipeline(pipeline);
hPass.setBindGroup(0, hBindGroup);
hPass.draw(3);
hPass.end();
// Pass 2: Vertical blur (intermediate → target)
writeUniforms(vUniformBuffer, [0, 1], width, height, radius);
ensureVerticalBindGroup(target);
const vPass = encoder.beginRenderPass({
colorAttachments: [
{
view: target.createView(),
loadOp: 'clear',
storeOp: 'store'
}
]
});
vPass.setPipeline(pipeline);
vPass.setBindGroup(0, verticalBindGroup!);
vPass.draw(3);
vPass.end();
device.queue.submit([encoder.finish()]);
}
function destroy(): void {
intermediateTexture?.destroy();
hUniformBuffer.destroy();
vUniformBuffer.destroy();
weightsBuffer.destroy();
}
return { apply, destroy };
}
Documentation
Gaussian Blur
Separable Gaussian blur using two render passes. Takes a source GPUTexture and writes the blurred result to a caller-provided target GPUTexture. Uses fragment shaders with hardware texture sampling for bilinear filtering and edge clamping.
API
createGaussianBlur(device, options?)
Returns a GaussianBlur instance.
| Option | Type | Default | Description |
|---|---|---|---|
format |
GPUTextureFormat |
'rgba8unorm' |
Texture format (must match source) |
blur.apply(source, target, options?)
Blurs the source texture and writes the result to the target texture.
source—GPUTextureto read from (must haveTEXTURE_BINDINGusage)target—GPUTextureto write to (must haveRENDER_ATTACHMENTusage)
| Option | Type | Default | Description |
|---|---|---|---|
radius |
number |
8 |
Blur radius in pixels (0–32) |
sigma |
number |
radius / 2 |
Gaussian standard deviation |
When radius is 0, the source is copied to the target without blurring.
blur.destroy()
Releases internal textures and buffers. Does not destroy source or target textures.
Further Reading
Further Reading
Resources on Gaussian blur, separable convolutions, and GPU image filtering.
Core Theory
Heckbert, "Filtering by Repeated Integration" (SIGGRAPH 1986) Foundational paper on efficient image filtering techniques, including the separability of Gaussian kernels that makes two-pass blur possible. https://dl.acm.org/doi/10.1145/15886.15921
Deriche, "Recursively Implementing the Gaussian and Its Derivatives" (1993) Introduces recursive (IIR) Gaussian filtering that achieves O(1) cost per pixel regardless of kernel size. A useful alternative for very large radii. https://inria.hal.science/inria-00074778/document
GPU Implementation
GPU Gems 3, Chapter 40: "Incremental Computation of the Gaussian" Practical GPU implementation techniques for Gaussian filtering, including incremental weight computation and shared memory optimizations. https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-40-incremental-computation-gaussian
Efficient Gaussian Blur with Linear Sampling (Rastergrid) Explains the technique of sampling between texels to halve the number of texture fetches in a Gaussian blur, leveraging hardware bilinear filtering. https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
Intel, "An Investigation of Fast Real-Time GPU-Based Image Blur Algorithms" Comprehensive comparison of GPU blur techniques: box blur, Gaussian, Kawase, dual filtering. Benchmarks and trade-offs for each approach. https://www.intel.com/content/www/us/en/developer/articles/technical/an-investigation-of-fast-real-time-gpu-based-image-blur-algorithms.html
Post-Processing Applications
Kawase, "Frame Buffer Postprocessing Effects in DOUBLE-S.T.E.A.L" (GDC 2003) Introduces the Kawase blur — a multi-pass approach using progressively larger sample offsets that approximates Gaussian blur with fewer passes. Common in game engines. https://www.gdcvault.com/play/1022665/Frame-Buffer-Postprocessing-Effects-in
Jimenez, "Next Generation Post Processing in Call of Duty: Advanced Warfare" (SIGGRAPH 2014) State of the art in real-time post-processing, including bloom, depth of field, and motion blur. Shows how Gaussian blur fits into a modern rendering pipeline. https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare/
Bloom
- Karis, "Real Shading in Unreal Engine 4" (SIGGRAPH 2013) Describes Unreal's bloom pipeline: threshold bright pixels, progressively downsample with blur, then composite. The Gaussian blur module can serve as the blur step. https://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf
General References
Wikipedia, "Gaussian blur" Clear mathematical description of the Gaussian function, separability proof, and relationship to the normal distribution. https://en.wikipedia.org/wiki/Gaussian_blur
WebGPU Specification — Render Passes The official spec for render pass encoding, color attachments, load/store operations, and fragment shader outputs used by this module. https://www.w3.org/TR/webgpu/#render-passes