Tessl Tile for npm/@elevenlabs/elevenlabs-js@2.24.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

audio-processing.md conversational-ai.md dubbing.md index.md music.md realtime.md studio.md text-to-speech.md transcription.md voices.md workspace.md

realtime.mddocs/

0
# Realtime Speech-to-Text
1

2
**Node.js Only**: Uses WebSocket (`ws`) and `child_process`. Not compatible with browsers, Deno, or Cloudflare Workers.
3

4
## Access
5

6
```typescript
7
import { ElevenLabsClient, AudioFormat, CommitStrategy, RealtimeEvents } from "@elevenlabs/elevenlabs-js";
8

9
const client = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY });
10
const connection = await client.speechToText.realtime.connect(options);
11
```
12

13
## Connection Methods
14

15
### speechToText.realtime.connect()
16

17
```typescript { .api }
18
connect(
19
  options: AudioOptions | UrlOptions
20
): Promise<RealtimeConnection>
21

22
interface AudioOptions {
23
  audioFormat: AudioFormat;
24
  sampleRate: number;
25
  modelId: string;
26
  commitStrategy?: CommitStrategy;  // Default: MANUAL
27
  vadSilenceThresholdSecs?: number;  // >0.3 and ≤3.0
28
  vadThreshold?: number;  // 0.1-0.9
29
  minSpeechDurationMs?: number;  // >50 and ≤2000
30
  minSilenceDurationMs?: number;  // >50 and ≤2000
31
  languageCode?: string;  // ISO-639-1 or ISO-639-3
32
  includeTimestamps?: boolean;  // Default: false
33
}
34

35
interface UrlOptions {
36
  url: string;  // Requires ffmpeg in PATH
37
  modelId: string;
38
  commitStrategy?: CommitStrategy;
39
  vadSilenceThresholdSecs?: number;
40
  vadThreshold?: number;
41
  minSpeechDurationMs?: number;
42
  minSilenceDurationMs?: number;
43
  languageCode?: string;
44
  includeTimestamps?: boolean;
45
}
46

47
enum AudioFormat {
48
  PCM_8000 = "pcm_8000",
49
  PCM_16000 = "pcm_16000",
50
  PCM_22050 = "pcm_22050",
51
  PCM_24000 = "pcm_24000",
52
  PCM_44100 = "pcm_44100",
53
  PCM_48000 = "pcm_48000",
54
  ULAW_8000 = "ulaw_8000"
55
}
56

57
enum CommitStrategy {
58
  MANUAL = "manual",  // Call connection.commit() to finalize
59
  VAD = "vad"  // Automatic commits on speech detection
60
}
61
```
62

63
## RealtimeConnection API
64

65
```typescript { .api }
66
class RealtimeConnection {
67
  on(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
68
  off(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
69

70
  send(data: {
71
    audioBase64: string;
72
    commit?: boolean;
73
    sampleRate?: number;
74
  }): void;
75

76
  commit(): void;  // Manual commit only
77
  close(): void;
78
}
79

80
enum RealtimeEvents {
81
  SESSION_STARTED = "session_started",
82
  PARTIAL_TRANSCRIPT = "partial_transcript",
83
  COMMITTED_TRANSCRIPT = "committed_transcript",
84
  COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS = "committed_transcript_with_timestamps",
85
  ERROR = "error",
86
  AUTH_ERROR = "auth_error",
87
  QUOTA_EXCEEDED = "quota_exceeded",
88
  OPEN = "open",
89
  CLOSE = "close"
90
}
91
```
92

93
## Usage Examples
94

95
### Manual Audio Streaming
96

97
```typescript
98
import { ElevenLabsClient, AudioFormat, CommitStrategy, RealtimeEvents } from "@elevenlabs/elevenlabs-js";
99

100
const client = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY });
101

102
const conn = await client.speechToText.realtime.connect({
103
  modelId: "scribe_v2_realtime",
104
  audioFormat: AudioFormat.PCM_16000,
105
  sampleRate: 16000,
106
  commitStrategy: CommitStrategy.MANUAL
107
});
108

109
conn.on(RealtimeEvents.SESSION_STARTED, (data) => {
110
  console.log("Session:", data.session_id);
111
});
112

113
conn.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
114
  console.log("Partial:", data.text);
115
});
116

117
conn.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => {
118
  console.log("Final:", data.text);
119
});
120

121
// Send audio chunks
122
conn.send({ audioBase64: base64Audio });
123

124
// Finalize
125
conn.commit();
126
conn.close();
127
```
128

129
### URL Streaming (requires ffmpeg)
130

131
```typescript
132
const conn = await client.speechToText.realtime.connect({
133
  modelId: "scribe_v2_realtime",
134
  url: "https://example.com/audio.mp3",
135
  commitStrategy: CommitStrategy.VAD
136
});
137

138
conn.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => {
139
  console.log("Transcript:", data.text);
140
});
141
```
142

143
### With VAD Configuration
144

145
```typescript
146
const conn = await client.speechToText.realtime.connect({
147
  modelId: "scribe_v2_realtime",
148
  audioFormat: AudioFormat.PCM_16000,
149
  sampleRate: 16000,
150
  commitStrategy: CommitStrategy.VAD,
151
  vadSilenceThresholdSecs: 0.5,
152
  vadThreshold: 0.5,
153
  minSpeechDurationMs: 100,
154
  minSilenceDurationMs: 200
155
});
156
```
157

158
### With Timestamps
159

160
```typescript
161
const conn = await client.speechToText.realtime.connect({
162
  modelId: "scribe_v2_realtime",
163
  audioFormat: AudioFormat.PCM_16000,
164
  sampleRate: 16000,
165
  includeTimestamps: true,
166
  languageCode: "en"
167
});
168

169
conn.on(RealtimeEvents.COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS, (data) => {
170
  console.log("Text:", data.text);
171
  data.words?.forEach(word => {
172
    console.log(`"${word.word}": ${word.start_time}s - ${word.end_time}s`);
173
  });
174
});
175
```
176

177
## Error Handling
178

179
```typescript
180
conn.on(RealtimeEvents.ERROR, (error) => {
181
  console.error("Error:", error.message);
182
});
183

184
conn.on(RealtimeEvents.AUTH_ERROR, (error) => {
185
  console.error("Auth error:", error);
186
});
187

188
conn.on(RealtimeEvents.QUOTA_EXCEEDED, (error) => {
189
  console.error("Quota exceeded");
190
});
191
```
192

193
## Edge Cases and Important Notes
194

195
### Platform Requirements
196
- **Node.js only**: Uses `ws` package and `child_process` - not compatible with browsers, Deno, or Cloudflare Workers
197
- **URL streaming**: Requires `ffmpeg` installed and available in PATH
198
- **Connection reuse**: Cannot reuse connection after `close()` - create new connection for each session
199

200
### VAD Configuration Constraints
201
- `vadSilenceThresholdSecs`: Must be >0.3 and ≤3.0 (e.g., 0.31 to 3.0)
202
- `vadThreshold`: Range 0.1-0.9, lower = more sensitive
203
- `minSpeechDurationMs`: Must be >50 and ≤2000 (e.g., 51 to 2000)
204
- `minSilenceDurationMs`: Must be >50 and ≤2000 (e.g., 51 to 2000)
205
- VAD parameters are interdependent - adjust together for best results
206

207
### Commit Strategy Behavior
208
- **Manual commit**: Must call `commit()` explicitly with `CommitStrategy.MANUAL`
209
- **VAD commit**: Automatic commits based on voice activity detection
210
- Manual commit gives precise control but requires managing commit timing
211
- VAD commit is easier but may commit at unexpected times
212

213
### Audio Format Requirements
214
- Sample rate must match `audioFormat` (e.g., PCM_16000 requires 16000 Hz)
215
- Audio data must be base64 encoded when sending
216
- For URL streaming, ffmpeg automatically converts to required format
217

218
### Error Handling
219
- Always listen for `ERROR`, `AUTH_ERROR`, and `QUOTA_EXCEEDED` events
220
- Connection may close unexpectedly - handle `CLOSE` event
221
- WebSocket errors are separate from API errors
222

223
### Message Types
224
- Internal message types (`Config`, `InputAudioChunk`, `WordsItem`, etc.) not exported from package root
225
- Access message data through event callbacks only
226

227
### Error Scenarios
228

229
```typescript
230
import { ElevenLabsClient, ElevenLabsError, RealtimeEvents } from "@elevenlabs/elevenlabs-js";
231

232
// Handle connection errors
233
async function connectWithRetry(options: AudioOptions | UrlOptions, maxRetries = 3) {
234
  for (let attempt = 0; attempt < maxRetries; attempt++) {
235
    try {
236
      const connection = await client.speechToText.realtime.connect(options);
237
      
238
      // Set up error handlers immediately
239
      connection.on(RealtimeEvents.ERROR, (error) => {
240
        console.error("Transcription error:", error);
241
      });
242
      
243
      connection.on(RealtimeEvents.AUTH_ERROR, (error) => {
244
        console.error("Authentication error:", error);
245
        connection.close();
246
      });
247
      
248
      connection.on(RealtimeEvents.QUOTA_EXCEEDED, (error) => {
249
        console.error("Quota exceeded:", error);
250
        connection.close();
251
      });
252
      
253
      return connection;
254
      
255
    } catch (error) {
256
      if (attempt < maxRetries - 1) {
257
        console.log(`Connection attempt ${attempt + 1} failed, retrying...`);
258
        await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)));
259
        continue;
260
      }
261
      throw error;
262
    }
263
  }
264
}
265

266
// Handle invalid audio format
267
try {
268
  const connection = await client.speechToText.realtime.connect({
269
    modelId: "scribe_v2_realtime",
270
    audioFormat: AudioFormat.PCM_16000,
271
    sampleRate: 22050,  // Mismatch!
272
  });
273
} catch (error) {
274
  console.error("Invalid audio configuration:", error);
275
  // Fix: match sampleRate to audioFormat
276
}
277

278
// Handle URL streaming without ffmpeg
279
try {
280
  const connection = await client.speechToText.realtime.connect({
281
    modelId: "scribe_v2_realtime",
282
    url: "https://example.com/audio.mp3"
283
  });
284
} catch (error) {
285
  if (error.message?.includes("ffmpeg")) {
286
    console.error("ffmpeg not found - install ffmpeg or use AudioOptions instead");
287
  }
288
}
289
```
290

291
## Comprehensive Examples
292

293
### Production-Ready Realtime Transcription
294

295
```typescript
296
import {
297
  ElevenLabsClient,
298
  AudioFormat,
299
  CommitStrategy,
300
  RealtimeEvents,
301
  RealtimeConnection
302
} from "@elevenlabs/elevenlabs-js";
303
import { EventEmitter } from "events";
304

305
class TranscriptionSession extends EventEmitter {
306
  private connection: RealtimeConnection | null = null;
307
  private isActive = false;
308
  private transcripts: string[] = [];
309
  
310
  constructor(private client: ElevenLabsClient) {
311
    super();
312
  }
313
  
314
  async start(options: AudioOptions | UrlOptions) {
315
    if (this.isActive) {
316
      throw new Error("Session already active");
317
    }
318
    
319
    try {
320
      this.connection = await this.client.speechToText.realtime.connect(options);
321
      this.isActive = true;
322
      
323
      // Set up all event handlers
324
      this.setupEventHandlers();
325
      
326
      this.emit("started");
327
      
328
    } catch (error) {
329
      this.isActive = false;
330
      this.emit("error", error);
331
      throw error;
332
    }
333
  }
334
  
335
  private setupEventHandlers() {
336
    if (!this.connection) return;
337
    
338
    this.connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
339
      console.log("Session started:", data.session_id);
340
      this.emit("sessionStarted", data);
341
    });
342
    
343
    this.connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
344
      this.emit("partialTranscript", data.text);
345
    });
346
    
347
    this.connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => {
348
      this.transcripts.push(data.text);
349
      this.emit("committedTranscript", data.text);
350
    });
351
    
352
    this.connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS, (data) => {
353
      this.transcripts.push(data.text);
354
      this.emit("committedTranscriptWithTimestamps", {
355
        text: data.text,
356
        words: data.words
357
      });
358
    });
359
    
360
    this.connection.on(RealtimeEvents.ERROR, (error) => {
361
      console.error("Transcription error:", error);
362
      this.emit("error", error);
363
    });
364
    
365
    this.connection.on(RealtimeEvents.AUTH_ERROR, (error) => {
366
      console.error("Auth error:", error);
367
      this.emit("authError", error);
368
      this.stop();
369
    });
370
    
371
    this.connection.on(RealtimeEvents.QUOTA_EXCEEDED, (error) => {
372
      console.error("Quota exceeded:", error);
373
      this.emit("quotaExceeded", error);
374
      this.stop();
375
    });
376
    
377
    this.connection.on(RealtimeEvents.CLOSE, () => {
378
      this.isActive = false;
379
      this.emit("closed");
380
    });
381
  }
382
  
383
  sendAudio(audioBase64: string, commit = false) {
384
    if (!this.connection || !this.isActive) {
385
      throw new Error("Session not active");
386
    }
387
    
388
    try {
389
      this.connection.send({ audioBase64, commit });
390
    } catch (error) {
391
      this.emit("error", error);
392
      throw error;
393
    }
394
  }
395
  
396
  commit() {
397
    if (!this.connection || !this.isActive) {
398
      throw new Error("Session not active");
399
    }
400
    
401
    this.connection.commit();
402
  }
403
  
404
  stop() {
405
    if (this.connection && this.isActive) {
406
      this.connection.close();
407
      this.connection = null;
408
      this.isActive = false;
409
    }
410
  }
411
  
412
  getTranscripts(): string[] {
413
    return [...this.transcripts];
414
  }
415
  
416
  getFullTranscript(): string {
417
    return this.transcripts.join(" ");
418
  }
419
}
420

421
// Usage
422
const client = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY });
423
const session = new TranscriptionSession(client);
424

425
session.on("committedTranscript", (text) => {
426
  console.log("Final:", text);
427
});
428

429
session.on("error", (error) => {
430
  console.error("Session error:", error);
431
});
432

433
await session.start({
434
  modelId: "scribe_v2_realtime",
435
  audioFormat: AudioFormat.PCM_16000,
436
  sampleRate: 16000,
437
  commitStrategy: CommitStrategy.VAD,
438
  includeTimestamps: true
439
});
440

441
// Send audio chunks
442
session.sendAudio(base64AudioChunk1);
443
session.sendAudio(base64AudioChunk2);
444
session.commit();  // If using MANUAL strategy
445

446
// Later
447
session.stop();
448
console.log("Full transcript:", session.getFullTranscript());
449
```
450

451
### Audio Stream Processing with Buffering
452

453
```typescript
454
import { Readable } from "stream";
455

456
async function transcribeAudioStream(
457
  audioStream: Readable,
458
  sampleRate: number
459
) {
460
  const connection = await client.speechToText.realtime.connect({
461
    modelId: "scribe_v2_realtime",
462
    audioFormat: AudioFormat.PCM_16000,
463
    sampleRate: 16000,
464
    commitStrategy: CommitStrategy.VAD,
465
    vadSilenceThresholdSecs: 0.5,
466
    vadThreshold: 0.5
467
  });
468
  
469
  const transcripts: string[] = [];
470
  
471
  connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (data) => {
472
    transcripts.push(data.text);
473
    console.log("Transcript:", data.text);
474
  });
475
  
476
  // Buffer audio data
477
  const buffer: Buffer[] = [];
478
  const chunkSize = 1600; // 100ms at 16kHz
479
  
480
  audioStream.on("data", (chunk: Buffer) => {
481
    buffer.push(chunk);
482
    
483
    // Send when buffer is full enough
484
    if (buffer.length >= chunkSize) {
485
      const audioData = Buffer.concat(buffer.splice(0, chunkSize));
486
      const base64 = audioData.toString("base64");
487
      connection.send({ audioBase64: base64 });
488
    }
489
  });
490
  
491
  audioStream.on("end", () => {
492
    // Send remaining buffer
493
    if (buffer.length > 0) {
494
      const audioData = Buffer.concat(buffer);
495
      const base64 = audioData.toString("base64");
496
      connection.send({ audioBase64: base64, commit: true });
497
    }
498
    
499
    // Final commit
500
    setTimeout(() => {
501
      connection.commit();
502
      connection.close();
503
    }, 1000);
504
  });
505
  
506
  audioStream.on("error", (error) => {
507
    console.error("Stream error:", error);
508
    connection.close();
509
  });
510
  
511
  return new Promise<string[]>((resolve) => {
512
    connection.on(RealtimeEvents.CLOSE, () => {
513
      resolve(transcripts);
514
    });
515
  });
516
}
517
```
518

519
### URL Streaming with Progress Tracking
520

521
```typescript
522
async function transcribeUrlWithProgress(url: string) {
523
  const connection = await client.speechToText.realtime.connect({
524
    modelId: "scribe_v2_realtime",
525
    url,
526
    commitStrategy: CommitStrategy.VAD,
527
    includeTimestamps: true
528
  });
529
  
530
  const transcripts: Array<{ text: string; timestamp: number }> = [];
531
  let startTime = Date.now();
532
  
533
  connection.on(RealtimeEvents.SESSION_STARTED, () => {
534
    startTime = Date.now();
535
    console.log("Transcription started");
536
  });
537
  
538
  connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS, (data) => {
539
    const elapsed = (Date.now() - startTime) / 1000;
540
    transcripts.push({
541
      text: data.text,
542
      timestamp: elapsed
543
    });
544
    
545
    console.log(`[${elapsed.toFixed(2)}s] ${data.text}`);
546
    
547
    if (data.words) {
548
      data.words.forEach(word => {
549
        console.log(`  "${word.text}": ${word.start_time}s - ${word.end_time}s`);
550
      });
551
    }
552
  });
553
  
554
  connection.on(RealtimeEvents.CLOSE, () => {
555
    console.log(`Transcription complete. Total time: ${(Date.now() - startTime) / 1000}s`);
556
    console.log(`Total transcripts: ${transcripts.length}`);
557
  });
558
  
559
  connection.on(RealtimeEvents.ERROR, (error) => {
560
    console.error("Error during transcription:", error);
561
  });
562
  
563
  // Connection will automatically close when URL stream completes
564
  return new Promise<typeof transcripts>((resolve, reject) => {
565
    connection.on(RealtimeEvents.CLOSE, () => {
566
      resolve(transcripts);
567
    });
568
    
569
    connection.on(RealtimeEvents.ERROR, (error) => {
570
      reject(error);
571
    });
572
  });
573
}
574
```
575

Version

Tile

Files

realtime.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

realtime.mddocs/