Node.js is single-threaded. You’ve heard this a thousand times. It’s also not entirely true — and hasn’t been since Node.js 12.
Worker Threads (experimental in Node 10.5, stable since Node 12) give you real OS threads sharing the same process. Not child processes (cluster module), not separate Node.js instances — actual threads with shared memory access, running JavaScript in parallel.
This post explains when and how to use them, with benchmarks that show both when they help and when they make things worse.
Table of contents
Open Table of contents
The Problem: CPU-Bound Work
Node.js’s event loop is excellent at I/O concurrency — handling thousands of simultaneous connections, waiting on databases, calling external APIs. The one thing it cannot do: parallelize CPU work.
// This blocks the event loop for its entire durationfunction computeHash(data, rounds) { let hash = data; for (let i = 0; i < rounds; i++) { hash = crypto.createHash('sha256').update(hash).digest('hex'); } return hash;}
// HTTP serverapp.get('/hash', (req, res) => { const result = computeHash(req.query.input, 100_000); // ~500ms res.json({ hash: result });});During that 500ms computation, NO other requests can be processed. The entire event loop is frozen. 100 concurrent users → 50 seconds of stacked latency for the last user.
Before Worker Threads, solutions were:
- child_process: Spawn a new process, IPC over stdio. Heavy overhead (~50ms startup).
- cluster: Multiple Node.js processes. Each has its own heap, no shared memory.
- Move work to C++: Native addons. Powerful but complex.
Worker Threads are the right tool for CPU-bound JavaScript.
Creating Your First Worker
// worker.js — runs in the threadimport { workerData, parentPort } from 'worker_threads';import crypto from 'crypto';
function computeHash(data, rounds) { let hash = data; for (let i = 0; i < rounds; i++) { hash = crypto.createHash('sha256').update(hash).digest('hex'); } return hash;}
const result = computeHash(workerData.input, workerData.rounds);parentPort.postMessage(result); // Send result to main threadimport { Worker } from 'worker_threads';import { fileURLToPath } from 'url';import path from 'path';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
function runHashInWorker(input, rounds) { return new Promise((resolve, reject) => { const worker = new Worker(path.join(__dirname, 'worker.js'), { workerData: { input, rounds }, });
worker.on('message', resolve); worker.on('error', reject); worker.on('exit', code => { if (code !== 0) reject(new Error(`Worker stopped with exit code ${code}`)); }); });}
// Now the event loop is NOT blockedapp.get('/hash', async (req, res) => { const hash = await runHashInWorker(req.query.input, 100_000); res.json({ hash });});The worker runs in a separate thread. The await suspends the async function (yields to the event loop), so other requests can be processed during the 500ms computation.
The Thread Pool Pattern
Creating a new Worker for every request is expensive (~10ms startup). For production, maintain a pool of reusable workers:
import { Worker } from 'worker_threads';import { EventEmitter } from 'events';import os from 'os';
class WorkerPool extends EventEmitter { constructor(workerScript, numWorkers = os.cpus().length || 4) { super(); this.workerScript = workerScript; this.freeWorkers = []; // tracks which workers are actually idle this.queue = []; this.taskMap = new Map();
for (let i = 0; i < numWorkers; i++) { this._createWorker(); } }
_createWorker() { const worker = new Worker(this.workerScript); worker.on('message', ({ id, result, error }) => { const task = this.taskMap.get(id); if (!task) return; this.taskMap.delete(id);
if (error) task.reject(new Error(error)); else task.resolve(result);
// Worker is now free — either process queued work or mark idle if (this.queue.length > 0) { const next = this.queue.shift(); this._dispatch(worker, next); } else { this.freeWorkers.push(worker); } });
worker.on('error', err => console.error('Worker error:', err)); this.freeWorkers.push(worker); }
run(data) { return new Promise((resolve, reject) => { const id = Math.random().toString(36).slice(2); const task = { id, data, resolve, reject };
const worker = this.freeWorkers.pop(); if (worker) { this._dispatch(worker, task); } else { this.queue.push(task); // all workers busy — queue it } }); }
_dispatch(worker, task) { this.taskMap.set(task.id, task); worker.postMessage({ id: task.id, data: task.data }); }
async destroy() { await Promise.all( [...this.freeWorkers].map(w => w.terminate()) ); }}
// Usageconst pool = new WorkerPool('./worker.js', 4); // 4 threads
app.get('/hash', async (req, res) => { const hash = await pool.run({ input: req.query.input, rounds: 100_000 }); res.json({ hash });});With a pool of 4 workers on a 4-core machine, you can process 4 CPU-bound tasks simultaneously, with additional tasks queued efficiently.
SharedArrayBuffer: Zero-Copy Data Sharing
Workers communicate via message passing. By default, this copies data between threads (serialization → transfer). For large arrays, this is expensive.
SharedArrayBuffer allocates memory shared between all threads — no copying:
const sharedBuffer = new SharedArrayBuffer(1_000_000 * 4); // 4MB, sharedconst sharedArray = new Float32Array(sharedBuffer);
// Fill with data in main threadfor (let i = 0; i < 1_000_000; i++) { sharedArray[i] = Math.random();}
// Worker receives the SharedArrayBuffer reference — no copy!const worker = new Worker('./worker.js', { workerData: { sharedBuffer, length: 1_000_000 },});import { workerData, parentPort } from 'worker_threads';
const array = new Float32Array(workerData.sharedBuffer);
// Compute sum on the shared data (no transfer overhead)let sum = 0;for (let i = 0; i < workerData.length; i++) { sum += array[i];}
parentPort.postMessage(sum);Warning: SharedArrayBuffer requires careful synchronization. Multiple threads writing to the same memory simultaneously is a data race. Use Atomics for safe concurrent access:
// Safe atomic operationsAtomics.add(int32Array, index, value); // Atomic incrementAtomics.compareExchange(int32Array, index, expected, replacement);Atomics.wait(int32Array, index, expectedValue); // Block until value changesAtomics.notify(int32Array, index, count); // Wake waiting threadsBenchmarks: When Workers Help (and When They Don’t)
Test 1: CPU-Intensive Hash Computation (Workers WIN)
Task: compute SHA-256 hash 100,000 times on a random string.
| Concurrency | Single thread | 4 Workers | Speedup |
|---|---|---|---|
| 1 request | 480ms | 490ms | 1x (overhead) |
| 4 concurrent | 1,920ms | 500ms | 3.8x |
| 16 concurrent | 7,680ms | 1,200ms | 6.4x |
| 64 concurrent | 30,720ms | 4,800ms | 6.4x |
At 4 concurrent requests matching 4 workers: near-linear speedup. Beyond that, queueing overhead appears but throughput still dramatically better than single-thread.
Test 2: I/O-Bound Database Queries (Workers DON’T help)
Task: fetch a single row from PostgreSQL.
| Concurrency | Single thread | 4 Workers |
|---|---|---|
| 1 | 5ms | 15ms (overhead!) |
| 10 | 50ms | 55ms |
| 100 | 350ms | 380ms |
Workers are slower for I/O. The event loop handles I/O concurrency natively. Adding threads adds overhead without benefit. Never use Workers for I/O-bound work.
Test 3: Image Resizing (Workers WIN significantly)
Task: resize a 4K JPEG to 800x600 using sharp.
| Concurrency | Single thread | 4 Workers | Speedup |
|---|---|---|---|
| 1 | 210ms | 225ms | 1x |
| 4 | 840ms | 230ms | 3.6x |
| 20 | 4,200ms | 1,150ms | 3.6x |
Sharp uses libvips (C/C++ via native addon), and the CPU-intensive image processing already runs off the main thread. The benefit of Workers here is isolation and controlled concurrency — distributing work across multiple V8 isolates prevents one batch of resizes from starving other request handlers.
Worker Threads vs cluster
| Feature | Worker Threads | cluster |
|---|---|---|
| Memory sharing | Yes (SharedArrayBuffer) | No |
| Startup cost | ~10ms | ~200ms (full Node process) |
| Isolation | Separate V8 isolates (own heap), shared memory via SharedArrayBuffer | Separate heaps, separate processes |
| Communication | Fast message passing + SharedArrayBuffer | IPC (slower) |
| Best for | CPU-bound, shared data | HTTP server scaling |
| Crash isolation | Worker crash emits ‘error’ event — handled gracefully if caught | Child crash = isolated |
For an HTTP server: cluster is still often better. A crashed worker brings down all requests in that process. With cluster, a crashed child doesn’t affect others.
For CPU-bound work within a single server instance: Workers are the right tool.
The hybrid approach used in many production systems: cluster for HTTP request isolation + Worker pool for CPU-bound tasks within each cluster worker.
Practical Example: Parallel CSV Processing
import { workerData, parentPort } from 'worker_threads';import { parse } from 'csv-parse/sync';
const { csvChunk, chunkIndex } = workerData;
const records = parse(csvChunk, { columns: true, skip_empty_lines: true });
// Process records (CPU-intensive validation, transformation)const processed = records.map(record => ({ ...record, normalized_amount: parseFloat(record.amount) * 1.05, category: categorize(record.merchant_type), hash: computeRecordHash(record),}));
parentPort.postMessage({ chunkIndex, records: processed });// main.js — split file into chunks, process in parallelimport fs from 'fs';import { Worker } from 'worker_threads';
async function processLargeCSV(filepath, numWorkers = 4) { const fileContent = fs.readFileSync(filepath, 'utf-8'); const lines = fileContent.split('\n'); const header = lines[0]; const dataLines = lines.slice(1);
// Split into equal chunks const chunkSize = Math.ceil(dataLines.length / numWorkers); const chunks = Array.from({ length: numWorkers }, (_, i) => [header, ...dataLines.slice(i * chunkSize, (i + 1) * chunkSize)].join('\n') );
// Process all chunks in parallel const results = await Promise.all( chunks.map((chunk, i) => new Promise((resolve, reject) => { const worker = new Worker('./processor-worker.js', { workerData: { csvChunk: chunk, chunkIndex: i }, }); worker.on('message', resolve); worker.on('error', reject); }) ) );
// Merge results in order return results .sort((a, b) => a.chunkIndex - b.chunkIndex) .flatMap(r => r.records);}Processing a 1M row CSV: ~45 seconds single-threaded → ~12 seconds with 4 Workers (parsing + transformation is CPU-bound).
Worker Threads unlock Node.js’s potential on multi-core hardware for CPU-heavy workloads. Used correctly — with a pool, avoiding them for I/O, and careful synchronization when sharing memory — they’re the difference between a Node.js server that saturates one core and one that saturates all of them.