or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

audio

audio-processing.mdrealtime-transcription.mdspeech-to-speech.mdspeech-to-text.mdtext-to-speech.md
index.md
tile.json

speech-to-speech.mddocs/audio/

Speech-to-Speech

Transform audio from one voice to another while maintaining timing, emotion, and delivery. Perfect for voice conversion, accent transformation, and creating consistent voice performances across different recordings.

Quick Reference

import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";

const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Access this API via: client.speechToSpeech

Capabilities

Convert Speech to Speech

Transform audio input to a different voice while preserving prosody and emotional delivery.

/**
 * @param voice_id - Target voice ID
 * @param request - Audio file and conversion settings
 * @param requestOptions - Optional request configuration
 * @returns ReadableStream of converted audio
 * @throws UnprocessableEntityError if request fails
 */
client.speechToSpeech.convert(
  voice_id: string,
  request: BodySpeechToSpeechV1SpeechToSpeechVoiceIdPost,
  requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;

interface BodySpeechToSpeechV1SpeechToSpeechVoiceIdPost {
  /** Input audio file */
  audio: File | Blob;
  /** Model ID to use */
  model_id?: string;
  /** Voice settings override */
  voice_settings?: VoiceSettings;
  /** Seed for reproducibility */
  seed?: number;
  /** Remove background noise from input */
  remove_background_noise?: boolean;
  /** Input file format */
  file_format?: FileFormat;
  /** Enable request/response logging */
  enableLogging?: boolean;
  /** Optimize streaming latency (0-4) */
  optimizeStreamingLatency?: number;
  /** Output format */
  outputFormat?: string;
}

interface VoiceSettings {
  /** Stability (0.0 to 1.0) */
  stability?: number;
  /** Similarity boost (0.0 to 1.0) */
  similarity_boost?: number;
  /** Style exaggeration (0.0 to 1.0) */
  style?: number;
  /** Use speaker boost */
  use_speaker_boost?: boolean;
}

enum FileFormat {
  MP3 = "mp3",
  MP4 = "mp4",
  MPEG = "mpeg",
  MPGA = "mpga",
  M4A = "m4a",
  WAV = "wav",
  WEBM = "webm",
}

Stream Speech to Speech

Stream voice conversion with low latency.

/**
 * @param voice_id - Target voice ID
 * @param request - Audio and streaming settings
 * @param requestOptions - Optional request configuration
 * @returns ReadableStream of converted audio chunks
 * @throws UnprocessableEntityError if request fails
 */
client.speechToSpeech.stream(
  voice_id: string,
  request: BodySpeechToSpeechStreamingV1SpeechToSpeechVoiceIdStreamPost,
  requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;

interface BodySpeechToSpeechStreamingV1SpeechToSpeechVoiceIdStreamPost {
  /** Input audio file */
  audio: File | Blob;
  /** Model ID */
  model_id?: string;
  /** Voice settings */
  voice_settings?: VoiceSettings;
  /** Seed for reproducibility */
  seed?: number;
  /** Remove background noise */
  remove_background_noise?: boolean;
  /** Input file format */
  file_format?: FileFormat;
  /** Optimize streaming latency (0-4) */
  optimize_streaming_latency?: number;
  /** Output format */
  output_format?: string;
}

Usage Examples

Basic Voice Conversion

import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { readFile, writeFile } from "fs/promises";

const client = new ElevenLabsClient({ apiKey: "your-api-key" });

// Load input audio
const inputAudio = await readFile("input.mp3");

// Convert to target voice
const convertedAudio = await client.speechToSpeech.convert(
  "target-voice-id",
  {
    audio: new File([inputAudio], "input.mp3"),
    model_id: "eleven_multilingual_sts_v2",
  }
);

// Save converted audio
const chunks: Uint8Array[] = [];
for await (const chunk of convertedAudio) {
  chunks.push(chunk);
}
await writeFile("output.mp3", Buffer.concat(chunks));

With Voice Settings

// Fine-tune voice conversion with custom settings
const convertedAudio = await client.speechToSpeech.convert(
  "target-voice-id",
  {
    audio: new File([inputAudio], "input.mp3"),
    voice_settings: {
      stability: 0.7,
      similarity_boost: 0.8,
      style: 0.5,
      use_speaker_boost: true,
    },
  }
);

Remove Background Noise

// Clean up noisy input audio during conversion
const convertedAudio = await client.speechToSpeech.convert(
  "target-voice-id",
  {
    audio: new File([noisyAudio], "noisy.mp3"),
    remove_background_noise: true,
  }
);

Streaming Conversion

// Stream conversion for lower latency
const audioStream = await client.speechToSpeech.stream(
  "target-voice-id",
  {
    audio: new File([inputAudio], "input.mp3"),
    optimize_streaming_latency: 3,
  }
);

// Process chunks as they arrive
for await (const chunk of audioStream) {
  // Play or process chunk immediately
  console.log("Received chunk:", chunk.length, "bytes");
}

Different Output Formats

// Convert with specific output format
const convertedAudio = await client.speechToSpeech.convert(
  "target-voice-id",
  {
    audio: new File([inputAudio], "input.wav"),
    file_format: "wav",
    outputFormat: "mp3_44100_128", // 44.1kHz, 128kbps MP3
  }
);

Reproducible Conversions

// Use seed for consistent results
const seed = 12345;

const audio1 = await client.speechToSpeech.convert("voice-id", {
  audio: new File([inputAudio], "input.mp3"),
  seed,
});

const audio2 = await client.speechToSpeech.convert("voice-id", {
  audio: new File([inputAudio], "input.mp3"),
  seed,
});

// audio1 and audio2 will be identical

Batch Voice Conversion

import { readdir } from "fs/promises";
import { join } from "path";

// Convert multiple files to same voice
async function convertDirectory(
  inputDir: string,
  outputDir: string,
  voiceId: string
): Promise<void> {
  const files = await readdir(inputDir);

  for (const file of files) {
    if (!file.endsWith(".mp3")) continue;

    console.log(`Converting ${file}...`);

    const inputPath = join(inputDir, file);
    const outputPath = join(outputDir, file);

    const inputAudio = await readFile(inputPath);
    const converted = await client.speechToSpeech.convert(voiceId, {
      audio: new File([inputAudio], file),
    });

    const chunks = [];
    for await (const chunk of converted) {
      chunks.push(chunk);
    }
    await writeFile(outputPath, Buffer.concat(chunks));

    console.log(`Saved ${file}`);
  }
}

await convertDirectory("./inputs", "./outputs", "voice-id");

Accent Transformation

// Transform accent while keeping voice characteristics
const britishVoice = "british-voice-id";
const americanVoice = "american-voice-id";

// Convert British accent to American
const convertedAudio = await client.speechToSpeech.convert(
  americanVoice,
  {
    audio: new File([britishAudio], "british.mp3"),
    voice_settings: {
      similarity_boost: 0.9, // Keep original voice characteristics
      stability: 0.8,
    },
  }
);

Voice Consistency Across Takes

// Ensure consistent voice across multiple recording takes
const targetVoice = "consistent-voice-id";

const takes = ["take1.mp3", "take2.mp3", "take3.mp3"];

for (const take of takes) {
  const audio = await readFile(take);

  const converted = await client.speechToSpeech.convert(targetVoice, {
    audio: new File([audio], take),
    seed: 42, // Same seed for consistency
    voice_settings: {
      stability: 1.0, // Maximum stability
      similarity_boost: 1.0,
    },
  });

  // Save converted take
  const chunks = [];
  for await (const chunk of converted) {
    chunks.push(chunk);
  }
  await writeFile(`converted_${take}`, Buffer.concat(chunks));
}

Real-time Voice Transformation

import { createReadStream } from "fs";

// Stream conversion for real-time processing
async function streamVoiceTransformation(
  inputPath: string,
  voiceId: string
): Promise<void> {
  const inputAudio = await readFile(inputPath);

  const stream = await client.speechToSpeech.stream(voiceId, {
    audio: new File([inputAudio], inputPath),
    optimize_streaming_latency: 4, // Maximum streaming optimization
    output_format: "pcm_16000",
  });

  // Process audio chunks in real-time
  for await (const chunk of stream) {
    // Send to audio player, WebSocket, etc.
    processAudioChunk(chunk);
  }
}

function processAudioChunk(chunk: Uint8Array): void {
  // Handle real-time audio processing
  console.log("Processing chunk:", chunk.length);
}

Error Handling

try {
  const converted = await client.speechToSpeech.convert("voice-id", {
    audio: new File([inputAudio], "input.mp3"),
  });

  // Process converted audio
} catch (error) {
  if (error instanceof ElevenLabs.UnprocessableEntityError) {
    console.error("Validation error:", error.message);
  } else {
    console.error("Conversion failed:", error);
  }
}