The official Node SDK for ElevenLabs providing comprehensive text-to-speech, voice synthesis, conversational AI, and audio processing capabilities
npx @tessl/cli install tessl/npm-elevenlabs--elevenlabs-js@2.24.00
# ElevenLabs Node SDK
1
2
## Installation
3
4
```bash
5
npm install @elevenlabs/elevenlabs-js
6
# Minimum Node: 18.0.0
7
```
8
9
## Client Initialization
10
11
```typescript
12
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
13
14
const client = new ElevenLabsClient({
15
apiKey: "YOUR_API_KEY" // or process.env.ELEVENLABS_API_KEY
16
});
17
```
18
19
### Client Configuration
20
21
```typescript { .api }
22
interface ElevenLabsClient.Options {
23
apiKey?: string; // Required via option or ELEVENLABS_API_KEY env var
24
environment?: ElevenLabsEnvironment | string;
25
baseUrl?: string;
26
headers?: Record<string, string | null | undefined>;
27
}
28
29
enum ElevenLabsEnvironment {
30
Production = "https://api.elevenlabs.io",
31
ProductionUs = "https://api.us.elevenlabs.io",
32
ProductionEu = "https://api.eu.residency.elevenlabs.io",
33
ProductionIndia = "https://api.in.residency.elevenlabs.io"
34
}
35
```
36
37
## Core Imports
38
39
```typescript
40
import {
41
ElevenLabsClient,
42
play,
43
stream,
44
RealtimeConnection,
45
RealtimeEvents,
46
AudioFormat,
47
CommitStrategy,
48
type ElevenLabs
49
} from "@elevenlabs/elevenlabs-js";
50
51
// Access types via namespace
52
const voice: ElevenLabs.Voice = ...;
53
const config: ElevenLabs.AgentConfig = ...;
54
```
55
56
**Import limitations**: Message types (`Config`, `InputAudioChunk`, `WordsItem`, etc.) and specific error types (`ErrorMessage`, `AuthErrorMessage`, `QuotaExceededErrorMessage`) are not exported from package root.
57
58
## Client Resources
59
60
```typescript { .api }
61
class ElevenLabsClient {
62
readonly history: History;
63
readonly textToSpeech: TextToSpeech;
64
readonly textToDialogue: TextToDialogue;
65
readonly textToSoundEffects: TextToSoundEffects;
66
readonly textToVoice: TextToVoice;
67
readonly speechToSpeech: SpeechToSpeech;
68
readonly speechToText: SpeechToText; // Returns SpeechToText wrapper with .realtime
69
readonly voices: Voices;
70
readonly audioIsolation: AudioIsolation;
71
readonly forcedAlignment: ForcedAlignment;
72
readonly conversationalAi: ConversationalAi;
73
readonly music: Music; // Returns Music wrapper with multipart support
74
readonly dubbing: Dubbing;
75
readonly studio: Studio;
76
readonly models: Models;
77
readonly user: User;
78
readonly workspace: Workspace;
79
readonly usage: Usage;
80
readonly samples: Samples;
81
readonly audioNative: AudioNative;
82
readonly pronunciationDictionaries: PronunciationDictionaries;
83
readonly serviceAccounts: ServiceAccounts;
84
readonly webhooks: Webhooks;
85
readonly tokens: Tokens;
86
}
87
```
88
89
## Request Options
90
91
```typescript { .api }
92
interface RequestOptions {
93
timeoutInSeconds?: number; // Default: 60
94
maxRetries?: number; // Default: 2
95
abortSignal?: AbortSignal;
96
apiKey?: string; // Override client API key
97
queryParams?: Record<string, unknown>;
98
headers?: Record<string, string | null | undefined>;
99
}
100
```
101
102
Auto-retry applies to: 408 (timeout), 409 (conflict), 429 (rate limit), 5XX (server errors).
103
104
## Quick Reference
105
106
```typescript
107
// Basic TTS
108
const audio = await client.textToSpeech.convert("voiceId", {
109
text: "Hello",
110
modelId: "eleven_multilingual_v2"
111
});
112
113
// Streaming TTS
114
const stream = await client.textToSpeech.stream("voiceId", {
115
text: "Hello",
116
modelId: "eleven_flash_v2_5"
117
});
118
119
// Voice cloning
120
const voice = await client.voices.add({
121
name: "Custom Voice",
122
files: [audioFile1, audioFile2]
123
});
124
125
// Realtime transcription
126
const connection = await client.speechToText.realtime.connect({
127
audioFormat: AudioFormat.PCM_16000,
128
language: "en"
129
});
130
connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (msg) => {
131
console.log(msg.text);
132
});
133
connection.send({ audioBase64: base64Audio });
134
```
135
136
## Comprehensive Examples
137
138
### Complete TTS Workflow with Error Handling
139
140
```typescript
141
import { ElevenLabsClient, ElevenLabsError } from "@elevenlabs/elevenlabs-js";
142
import * as fs from "fs";
143
144
const client = new ElevenLabsClient({
145
apiKey: process.env.ELEVENLABS_API_KEY
146
});
147
148
async function generateAudiobook(text: string, outputPath: string) {
149
try {
150
// Get available voices
151
const voices = await client.voices.getAll();
152
const narratorVoice = voices.voices.find(v =>
153
v.name?.toLowerCase().includes("narrator")
154
);
155
156
if (!narratorVoice) {
157
throw new Error("Narrator voice not found");
158
}
159
160
// Split long text into chunks (respecting max length)
161
const maxLength = 5000; // Conservative limit
162
const chunks = text.match(new RegExp(`.{1,${maxLength}}(?=\\s|$)`, 'g')) || [];
163
164
const audioChunks: Buffer[] = [];
165
166
// Generate audio for each chunk with continuity
167
for (let i = 0; i < chunks.length; i++) {
168
const chunk = chunks[i];
169
const previousText = i > 0 ? chunks[i - 1].slice(-200) : undefined;
170
const nextText = i < chunks.length - 1 ? chunks[i + 1].slice(0, 200) : undefined;
171
172
const audioStream = await client.textToSpeech.convert(
173
narratorVoice.voiceId,
174
{
175
text: chunk,
176
modelId: "eleven_multilingual_v2",
177
previousText,
178
nextText,
179
voiceSettings: {
180
stability: 0.7, // Higher stability for consistent narration
181
similarityBoost: 0.75,
182
useSpeakerBoost: true
183
},
184
outputFormat: "mp3_44100_128"
185
},
186
{
187
timeoutInSeconds: 120, // Longer timeout for longer text
188
maxRetries: 3
189
}
190
);
191
192
// Collect audio chunks
193
const reader = audioStream.getReader();
194
const chunks: Uint8Array[] = [];
195
while (true) {
196
const { done, value } = await reader.read();
197
if (done) break;
198
chunks.push(value);
199
}
200
audioChunks.push(Buffer.concat(chunks));
201
202
// Small delay to avoid rate limiting
203
await new Promise(resolve => setTimeout(resolve, 100));
204
}
205
206
// Combine all chunks
207
const finalAudio = Buffer.concat(audioChunks);
208
fs.writeFileSync(outputPath, finalAudio);
209
console.log(`Audiobook saved to ${outputPath}`);
210
211
} catch (error) {
212
if (error instanceof ElevenLabsError) {
213
if (error.statusCode === 429) {
214
console.error("Rate limit exceeded. Consider implementing exponential backoff.");
215
} else if (error.statusCode === 422) {
216
console.error("Invalid request:", error.body);
217
} else {
218
console.error(`API error ${error.statusCode}:`, error.message);
219
}
220
} else {
221
console.error("Unexpected error:", error);
222
}
223
throw error;
224
}
225
}
226
```
227
228
### Voice Cloning with Quality Checks
229
230
```typescript
231
import { createReadStream } from "fs";
232
import * as fs from "fs";
233
import { ElevenLabsClient, ElevenLabsError } from "@elevenlabs/elevenlabs-js";
234
235
async function createHighQualityVoice(name: string, samplePaths: string[]) {
236
const client = new ElevenLabsClient({
237
apiKey: process.env.ELEVENLABS_API_KEY
238
});
239
240
try {
241
// Validate samples before uploading
242
const samples = samplePaths.map(path => {
243
const stats = fs.statSync(path);
244
if (stats.size > 10 * 1024 * 1024) { // 10MB limit
245
throw new Error(`Sample ${path} exceeds 10MB limit`);
246
}
247
return createReadStream(path);
248
});
249
250
if (samples.length < 1 || samples.length > 25) {
251
throw new Error("Need 1-25 samples for voice cloning");
252
}
253
254
// Create voice with noise removal for better quality
255
const voice = await client.voices.ivc.create({
256
name,
257
files: samples,
258
removeBackgroundNoise: true, // Only if samples have background noise
259
description: `High-quality voice clone created programmatically`,
260
labels: JSON.stringify({
261
created_via: "sdk",
262
sample_count: samples.length,
263
created_at: new Date().toISOString()
264
})
265
});
266
267
// Test the voice with a sample generation
268
const testAudio = await client.textToSpeech.convert(voice.voiceId, {
269
text: "This is a test of the cloned voice quality.",
270
modelId: "eleven_multilingual_v2"
271
});
272
273
// Verify audio was generated
274
const reader = testAudio.getReader();
275
const { done, value } = await reader.read();
276
if (done || !value || value.length === 0) {
277
throw new Error("Voice test failed - no audio generated");
278
}
279
280
console.log(`Voice ${voice.voiceId} created and tested successfully`);
281
return voice;
282
283
} catch (error) {
284
if (error instanceof ElevenLabsError) {
285
if (error.statusCode === 400) {
286
console.error("Invalid sample format or quality");
287
} else if (error.statusCode === 413) {
288
console.error("Sample files too large");
289
}
290
}
291
throw error;
292
}
293
}
294
```
295
296
### Streaming with Progress Tracking
297
298
```typescript
299
async function streamWithProgress(voiceId: string, text: string) {
300
const stream = await client.textToSpeech.stream(voiceId, {
301
text,
302
modelId: "eleven_flash_v2_5",
303
optimizeStreamingLatency: 3 // Maximum latency optimization
304
});
305
306
let totalBytes = 0;
307
const chunks: Uint8Array[] = [];
308
309
const reader = stream.getReader();
310
while (true) {
311
const { done, value } = await reader.read();
312
if (done) break;
313
314
chunks.push(value);
315
totalBytes += value.length;
316
console.log(`Received ${totalBytes} bytes so far...`);
317
}
318
319
const audio = Buffer.concat(chunks);
320
console.log(`Complete audio: ${audio.length} bytes`);
321
return audio;
322
}
323
```
324
325
## Error Handling
326
327
```typescript { .api }
328
class ElevenLabsError extends Error {
329
readonly statusCode?: number;
330
readonly body?: unknown;
331
readonly rawResponse?: RawResponse;
332
333
constructor(options: {
334
message?: string;
335
statusCode?: number;
336
body?: unknown;
337
rawResponse?: RawResponse;
338
});
339
}
340
341
class ElevenLabsTimeoutError extends ElevenLabsError {
342
constructor(message: string);
343
}
344
345
interface RawResponse {
346
statusCode: number;
347
headers: Record<string, string>;
348
}
349
```
350
351
The SDK throws `ElevenLabsError` instances for API errors. While the SDK internally uses specific error types for different HTTP status codes (BadRequestError, UnauthorizedError, ForbiddenError, NotFoundError, UnprocessableEntityError), these are not exported from the package. All API errors should be caught as `ElevenLabsError` instances and the error message or status code examined to determine the specific error type.
352
353
The SDK automatically retries failed requests (up to 2 times by default) for:
354
- Timeout errors (408)
355
- Conflict errors (409)
356
- Rate limit errors (429)
357
- Server errors (5XX)
358
359
### Error Handling Examples
360
361
```typescript
362
import { ElevenLabsClient, ElevenLabsError, ElevenLabsTimeoutError } from "@elevenlabs/elevenlabs-js";
363
364
// Basic error handling
365
try {
366
const audio = await client.textToSpeech.convert("voiceId", {
367
text: "Hello world",
368
modelId: "eleven_multilingual_v2"
369
});
370
} catch (error) {
371
if (error instanceof ElevenLabsError) {
372
console.error(`API error ${error.statusCode}: ${error.message}`);
373
374
// Handle specific status codes
375
if (error.statusCode === 401) {
376
console.error("Invalid API key");
377
} else if (error.statusCode === 429) {
378
console.error("Rate limit exceeded - request will be retried automatically");
379
} else if (error.statusCode === 422) {
380
console.error("Validation error:", error.body);
381
}
382
} else {
383
// Non-API errors (network, etc.)
384
console.error("Unexpected error:", error);
385
}
386
}
387
388
// Timeout handling
389
try {
390
const audio = await client.textToSpeech.convert("voiceId", {
391
text: "Very long text...",
392
}, {
393
timeoutInSeconds: 30
394
});
395
} catch (error) {
396
if (error instanceof ElevenLabsTimeoutError) {
397
console.error("Request timed out - consider using streaming for long text");
398
}
399
}
400
401
// Retry configuration for critical operations
402
try {
403
const voice = await client.voices.get("voiceId", {
404
maxRetries: 5, // Increase retries for critical operations
405
timeoutInSeconds: 120
406
});
407
} catch (error) {
408
if (error instanceof ElevenLabsError) {
409
// Log full error details for debugging
410
console.error("Full error details:", {
411
statusCode: error.statusCode,
412
message: error.message,
413
body: error.body,
414
headers: error.rawResponse?.headers
415
});
416
}
417
}
418
```
419
420
## Utility Functions
421
422
```typescript { .api }
423
function play(audio: AsyncIterable<Uint8Array>): Promise<void>; // Requires ffplay
424
function stream(audio: ReadableStream<Uint8Array>): Promise<void>; // Requires mpv
425
```
426
427
## Common Types
428
429
```typescript { .api }
430
interface VoiceSettings {
431
stability?: number; // 0-1, voice consistency
432
similarityBoost?: number; // 0-1, voice similarity to original
433
style?: number; // 0-1, exaggeration level
434
useSpeakerBoost?: boolean; // Boost speaker similarity (increases latency)
435
speed?: number; // 1.0 = default, <1 = slower, >1 = faster
436
}
437
438
interface PronunciationDictionaryLocator {
439
pronunciationDictionaryId: string;
440
versionId?: string; // Latest version if omitted
441
}
442
443
// Model information
444
interface Model {
445
modelId: string;
446
name: string;
447
canBeFinetuned: boolean;
448
canDoTextToSpeech: boolean;
449
canDoVoiceConversion: boolean;
450
canUseStyle: boolean;
451
canUseSpeakerBoost: boolean;
452
servesProVoices: boolean;
453
tokenCostFactor: number;
454
description?: string;
455
requiresAlphaAccess?: boolean;
456
maxCharactersRequestFreeUser?: number;
457
maxCharactersRequestSubscribedUser?: number;
458
maximumTextLengthPerRequest?: number;
459
languages?: Language[];
460
}
461
462
interface Language {
463
languageId: string;
464
name: string;
465
}
466
467
// File upload type
468
type Uploadable = File | Blob | ReadableStream;
469
470
// Output format options
471
type OutputFormat =
472
| "mp3_22050_32" | "mp3_24000_48" | "mp3_44100_32" | "mp3_44100_64"
473
| "mp3_44100_96" | "mp3_44100_128" | "mp3_44100_192" // 192 requires Creator+
474
| "pcm_8000" | "pcm_16000" | "pcm_22050" | "pcm_24000"
475
| "pcm_32000" | "pcm_44100" | "pcm_48000" // 44.1kHz+ requires Pro+
476
| "ulaw_8000" // For Twilio
477
| "alaw_8000"
478
| "opus_48000_32" | "opus_48000_64" | "opus_48000_96"
479
| "opus_48000_128" | "opus_48000_192";
480
```
481
482
## Feature Documentation
483
484
- [Text-to-Speech](./text-to-speech.md) - TTS conversion, streaming, dialogue, sound effects
485
- [Voices](./voices.md) - Voice management, cloning, design, samples
486
- [Conversational AI](./conversational-ai.md) - Agents, knowledge bases, conversations
487
- [Realtime](./realtime.md) - WebSocket connections for real-time TTS and transcription
488
- [Music](./music.md) - Music generation, composition plans, stem separation
489
- [Dubbing](./dubbing.md) - Audio/video dubbing, translation
490
- [Transcription](./transcription.md) - Speech-to-text, diarization
491
- [Audio Processing](./audio-processing.md) - Isolation, alignment, speech-to-speech
492
- [Workspace](./workspace.md) - User, workspace, usage, webhooks, service accounts
493
- [Studio](./studio.md) - Long-form audio projects, podcasts
494
495
## Runtime Support
496
497
Node.js 18+, Vercel Edge, Cloudflare Workers, Deno v1.25+, Bun 1.0+
498
499
## Dependencies
500
501
- `node-fetch` - HTTP client (Node.js only)
502
- `ws` - WebSocket client
503
- `command-exists` - System command detection
504