Official Node.js SDK for ElevenLabs text-to-speech API with voice synthesis, real-time transcription, music generation, and conversational AI
Overall
score
86%
Evaluation — 86%
↑ 1.06xAgent success when using this tile
Transform audio from one voice to another while maintaining timing, emotion, and delivery. Perfect for voice conversion, accent transformation, and creating consistent voice performances across different recordings.
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Access this API via: client.speechToSpeechTransform audio input to a different voice while preserving prosody and emotional delivery.
/**
* @param voice_id - Target voice ID
* @param request - Audio file and conversion settings
* @param requestOptions - Optional request configuration
* @returns ReadableStream of converted audio
* @throws UnprocessableEntityError if request fails
*/
client.speechToSpeech.convert(
voice_id: string,
request: BodySpeechToSpeechV1SpeechToSpeechVoiceIdPost,
requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;
interface BodySpeechToSpeechV1SpeechToSpeechVoiceIdPost {
/** Input audio file */
audio: File | Blob;
/** Model ID to use */
model_id?: string;
/** Voice settings override */
voice_settings?: VoiceSettings;
/** Seed for reproducibility */
seed?: number;
/** Remove background noise from input */
remove_background_noise?: boolean;
/** Input file format */
file_format?: FileFormat;
/** Enable request/response logging */
enableLogging?: boolean;
/** Optimize streaming latency (0-4) */
optimizeStreamingLatency?: number;
/** Output format */
outputFormat?: string;
}
interface VoiceSettings {
/** Stability (0.0 to 1.0) */
stability?: number;
/** Similarity boost (0.0 to 1.0) */
similarity_boost?: number;
/** Style exaggeration (0.0 to 1.0) */
style?: number;
/** Use speaker boost */
use_speaker_boost?: boolean;
}
enum FileFormat {
MP3 = "mp3",
MP4 = "mp4",
MPEG = "mpeg",
MPGA = "mpga",
M4A = "m4a",
WAV = "wav",
WEBM = "webm",
}Stream voice conversion with low latency.
/**
* @param voice_id - Target voice ID
* @param request - Audio and streaming settings
* @param requestOptions - Optional request configuration
* @returns ReadableStream of converted audio chunks
* @throws UnprocessableEntityError if request fails
*/
client.speechToSpeech.stream(
voice_id: string,
request: BodySpeechToSpeechStreamingV1SpeechToSpeechVoiceIdStreamPost,
requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;
interface BodySpeechToSpeechStreamingV1SpeechToSpeechVoiceIdStreamPost {
/** Input audio file */
audio: File | Blob;
/** Model ID */
model_id?: string;
/** Voice settings */
voice_settings?: VoiceSettings;
/** Seed for reproducibility */
seed?: number;
/** Remove background noise */
remove_background_noise?: boolean;
/** Input file format */
file_format?: FileFormat;
/** Optimize streaming latency (0-4) */
optimize_streaming_latency?: number;
/** Output format */
output_format?: string;
}import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { readFile, writeFile } from "fs/promises";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Load input audio
const inputAudio = await readFile("input.mp3");
// Convert to target voice
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([inputAudio], "input.mp3"),
model_id: "eleven_multilingual_sts_v2",
}
);
// Save converted audio
const chunks: Uint8Array[] = [];
for await (const chunk of convertedAudio) {
chunks.push(chunk);
}
await writeFile("output.mp3", Buffer.concat(chunks));// Fine-tune voice conversion with custom settings
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([inputAudio], "input.mp3"),
voice_settings: {
stability: 0.7,
similarity_boost: 0.8,
style: 0.5,
use_speaker_boost: true,
},
}
);// Clean up noisy input audio during conversion
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([noisyAudio], "noisy.mp3"),
remove_background_noise: true,
}
);// Stream conversion for lower latency
const audioStream = await client.speechToSpeech.stream(
"target-voice-id",
{
audio: new File([inputAudio], "input.mp3"),
optimize_streaming_latency: 3,
}
);
// Process chunks as they arrive
for await (const chunk of audioStream) {
// Play or process chunk immediately
console.log("Received chunk:", chunk.length, "bytes");
}// Convert with specific output format
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([inputAudio], "input.wav"),
file_format: "wav",
outputFormat: "mp3_44100_128", // 44.1kHz, 128kbps MP3
}
);// Use seed for consistent results
const seed = 12345;
const audio1 = await client.speechToSpeech.convert("voice-id", {
audio: new File([inputAudio], "input.mp3"),
seed,
});
const audio2 = await client.speechToSpeech.convert("voice-id", {
audio: new File([inputAudio], "input.mp3"),
seed,
});
// audio1 and audio2 will be identicalimport { readdir } from "fs/promises";
import { join } from "path";
// Convert multiple files to same voice
async function convertDirectory(
inputDir: string,
outputDir: string,
voiceId: string
): Promise<void> {
const files = await readdir(inputDir);
for (const file of files) {
if (!file.endsWith(".mp3")) continue;
console.log(`Converting ${file}...`);
const inputPath = join(inputDir, file);
const outputPath = join(outputDir, file);
const inputAudio = await readFile(inputPath);
const converted = await client.speechToSpeech.convert(voiceId, {
audio: new File([inputAudio], file),
});
const chunks = [];
for await (const chunk of converted) {
chunks.push(chunk);
}
await writeFile(outputPath, Buffer.concat(chunks));
console.log(`Saved ${file}`);
}
}
await convertDirectory("./inputs", "./outputs", "voice-id");// Transform accent while keeping voice characteristics
const britishVoice = "british-voice-id";
const americanVoice = "american-voice-id";
// Convert British accent to American
const convertedAudio = await client.speechToSpeech.convert(
americanVoice,
{
audio: new File([britishAudio], "british.mp3"),
voice_settings: {
similarity_boost: 0.9, // Keep original voice characteristics
stability: 0.8,
},
}
);// Ensure consistent voice across multiple recording takes
const targetVoice = "consistent-voice-id";
const takes = ["take1.mp3", "take2.mp3", "take3.mp3"];
for (const take of takes) {
const audio = await readFile(take);
const converted = await client.speechToSpeech.convert(targetVoice, {
audio: new File([audio], take),
seed: 42, // Same seed for consistency
voice_settings: {
stability: 1.0, // Maximum stability
similarity_boost: 1.0,
},
});
// Save converted take
const chunks = [];
for await (const chunk of converted) {
chunks.push(chunk);
}
await writeFile(`converted_${take}`, Buffer.concat(chunks));
}import { createReadStream } from "fs";
// Stream conversion for real-time processing
async function streamVoiceTransformation(
inputPath: string,
voiceId: string
): Promise<void> {
const inputAudio = await readFile(inputPath);
const stream = await client.speechToSpeech.stream(voiceId, {
audio: new File([inputAudio], inputPath),
optimize_streaming_latency: 4, // Maximum streaming optimization
output_format: "pcm_16000",
});
// Process audio chunks in real-time
for await (const chunk of stream) {
// Send to audio player, WebSocket, etc.
processAudioChunk(chunk);
}
}
function processAudioChunk(chunk: Uint8Array): void {
// Handle real-time audio processing
console.log("Processing chunk:", chunk.length);
}try {
const converted = await client.speechToSpeech.convert("voice-id", {
audio: new File([inputAudio], "input.mp3"),
});
// Process converted audio
} catch (error) {
if (error instanceof ElevenLabs.UnprocessableEntityError) {
console.error("Validation error:", error.message);
} else {
console.error("Conversion failed:", error);
}
}