Transform audio from one voice to another while maintaining timing, emotion, and delivery. Perfect for voice conversion, accent transformation, and creating consistent voice performances across different recordings.
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Access this API via: client.speechToSpeechTransform audio input to a different voice while preserving prosody and emotional delivery.
/**
* @param voice_id - Target voice ID
* @param request - Audio file and conversion settings
* @param requestOptions - Optional request configuration
* @returns ReadableStream of converted audio
* @throws UnprocessableEntityError if request fails
*/
client.speechToSpeech.convert(
voice_id: string,
request: BodySpeechToSpeechV1SpeechToSpeechVoiceIdPost,
requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;
interface BodySpeechToSpeechV1SpeechToSpeechVoiceIdPost {
/** Input audio file */
audio: File | Blob;
/** Model ID to use */
model_id?: string;
/** Voice settings override */
voice_settings?: VoiceSettings;
/** Seed for reproducibility */
seed?: number;
/** Remove background noise from input */
remove_background_noise?: boolean;
/** Input file format */
file_format?: FileFormat;
/** Enable request/response logging */
enableLogging?: boolean;
/** Optimize streaming latency (0-4) */
optimizeStreamingLatency?: number;
/** Output format */
outputFormat?: string;
}
interface VoiceSettings {
/** Stability (0.0 to 1.0) */
stability?: number;
/** Similarity boost (0.0 to 1.0) */
similarity_boost?: number;
/** Style exaggeration (0.0 to 1.0) */
style?: number;
/** Use speaker boost */
use_speaker_boost?: boolean;
}
enum FileFormat {
MP3 = "mp3",
MP4 = "mp4",
MPEG = "mpeg",
MPGA = "mpga",
M4A = "m4a",
WAV = "wav",
WEBM = "webm",
}Stream voice conversion with low latency.
/**
* @param voice_id - Target voice ID
* @param request - Audio and streaming settings
* @param requestOptions - Optional request configuration
* @returns ReadableStream of converted audio chunks
* @throws UnprocessableEntityError if request fails
*/
client.speechToSpeech.stream(
voice_id: string,
request: BodySpeechToSpeechStreamingV1SpeechToSpeechVoiceIdStreamPost,
requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;
interface BodySpeechToSpeechStreamingV1SpeechToSpeechVoiceIdStreamPost {
/** Input audio file */
audio: File | Blob;
/** Model ID */
model_id?: string;
/** Voice settings */
voice_settings?: VoiceSettings;
/** Seed for reproducibility */
seed?: number;
/** Remove background noise */
remove_background_noise?: boolean;
/** Input file format */
file_format?: FileFormat;
/** Optimize streaming latency (0-4) */
optimize_streaming_latency?: number;
/** Output format */
output_format?: string;
}import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { readFile, writeFile } from "fs/promises";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Load input audio
const inputAudio = await readFile("input.mp3");
// Convert to target voice
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([inputAudio], "input.mp3"),
model_id: "eleven_multilingual_sts_v2",
}
);
// Save converted audio
const chunks: Uint8Array[] = [];
for await (const chunk of convertedAudio) {
chunks.push(chunk);
}
await writeFile("output.mp3", Buffer.concat(chunks));// Fine-tune voice conversion with custom settings
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([inputAudio], "input.mp3"),
voice_settings: {
stability: 0.7,
similarity_boost: 0.8,
style: 0.5,
use_speaker_boost: true,
},
}
);// Clean up noisy input audio during conversion
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([noisyAudio], "noisy.mp3"),
remove_background_noise: true,
}
);// Stream conversion for lower latency
const audioStream = await client.speechToSpeech.stream(
"target-voice-id",
{
audio: new File([inputAudio], "input.mp3"),
optimize_streaming_latency: 3,
}
);
// Process chunks as they arrive
for await (const chunk of audioStream) {
// Play or process chunk immediately
console.log("Received chunk:", chunk.length, "bytes");
}// Convert with specific output format
const convertedAudio = await client.speechToSpeech.convert(
"target-voice-id",
{
audio: new File([inputAudio], "input.wav"),
file_format: "wav",
outputFormat: "mp3_44100_128", // 44.1kHz, 128kbps MP3
}
);// Use seed for consistent results
const seed = 12345;
const audio1 = await client.speechToSpeech.convert("voice-id", {
audio: new File([inputAudio], "input.mp3"),
seed,
});
const audio2 = await client.speechToSpeech.convert("voice-id", {
audio: new File([inputAudio], "input.mp3"),
seed,
});
// audio1 and audio2 will be identicalimport { readdir } from "fs/promises";
import { join } from "path";
// Convert multiple files to same voice
async function convertDirectory(
inputDir: string,
outputDir: string,
voiceId: string
): Promise<void> {
const files = await readdir(inputDir);
for (const file of files) {
if (!file.endsWith(".mp3")) continue;
console.log(`Converting ${file}...`);
const inputPath = join(inputDir, file);
const outputPath = join(outputDir, file);
const inputAudio = await readFile(inputPath);
const converted = await client.speechToSpeech.convert(voiceId, {
audio: new File([inputAudio], file),
});
const chunks = [];
for await (const chunk of converted) {
chunks.push(chunk);
}
await writeFile(outputPath, Buffer.concat(chunks));
console.log(`Saved ${file}`);
}
}
await convertDirectory("./inputs", "./outputs", "voice-id");// Transform accent while keeping voice characteristics
const britishVoice = "british-voice-id";
const americanVoice = "american-voice-id";
// Convert British accent to American
const convertedAudio = await client.speechToSpeech.convert(
americanVoice,
{
audio: new File([britishAudio], "british.mp3"),
voice_settings: {
similarity_boost: 0.9, // Keep original voice characteristics
stability: 0.8,
},
}
);// Ensure consistent voice across multiple recording takes
const targetVoice = "consistent-voice-id";
const takes = ["take1.mp3", "take2.mp3", "take3.mp3"];
for (const take of takes) {
const audio = await readFile(take);
const converted = await client.speechToSpeech.convert(targetVoice, {
audio: new File([audio], take),
seed: 42, // Same seed for consistency
voice_settings: {
stability: 1.0, // Maximum stability
similarity_boost: 1.0,
},
});
// Save converted take
const chunks = [];
for await (const chunk of converted) {
chunks.push(chunk);
}
await writeFile(`converted_${take}`, Buffer.concat(chunks));
}import { createReadStream } from "fs";
// Stream conversion for real-time processing
async function streamVoiceTransformation(
inputPath: string,
voiceId: string
): Promise<void> {
const inputAudio = await readFile(inputPath);
const stream = await client.speechToSpeech.stream(voiceId, {
audio: new File([inputAudio], inputPath),
optimize_streaming_latency: 4, // Maximum streaming optimization
output_format: "pcm_16000",
});
// Process audio chunks in real-time
for await (const chunk of stream) {
// Send to audio player, WebSocket, etc.
processAudioChunk(chunk);
}
}
function processAudioChunk(chunk: Uint8Array): void {
// Handle real-time audio processing
console.log("Processing chunk:", chunk.length);
}try {
const converted = await client.speechToSpeech.convert("voice-id", {
audio: new File([inputAudio], "input.mp3"),
});
// Process converted audio
} catch (error) {
if (error instanceof ElevenLabs.UnprocessableEntityError) {
console.error("Validation error:", error.message);
} else {
console.error("Conversion failed:", error);
}
}