or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

audio

audio-processing.mdrealtime-transcription.mdspeech-to-speech.mdspeech-to-text.mdtext-to-speech.md
index.md
tile.json

text-to-dialogue.mddocs/generation/

Text-to-Dialogue

Generate multi-speaker dialogue from text with voice ID pairs and precise timing information. Create natural conversations between multiple speakers with synchronized audio output.

Quick Reference

import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";

const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Access this API via: client.textToDialogue

Capabilities

Convert Text to Dialogue

Generate dialogue audio from text segments assigned to different voices.

/**
 * @param request - Dialogue segments with voice assignments
 * @param requestOptions - Optional request configuration
 * @returns ReadableStream of dialogue audio
 * @throws UnprocessableEntityError if request fails
 */
client.textToDialogue.convert(
  request: BodyTextToDialogueMultiVoiceV1TextToDialoguePost,
  requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;

interface BodyTextToDialogueMultiVoiceV1TextToDialoguePost {
  /** Output format of the generated audio */
  outputFormat?: string;
  /** A list of dialogue inputs, each containing text and a voice ID which will be converted into speech. (REQUIRED) */
  inputs: DialogueInput[];
  /** Identifier of the model that will be used */
  modelId?: string;
  /** Language code (ISO 639-1) used to enforce a language for the model and text normalization */
  languageCode?: string;
  /** Settings controlling the dialogue generation */
  settings?: ModelSettingsResponseModel;
  /** A list of pronunciation dictionary locators (id, version_id) to be applied to the text (max 3) */
  pronunciationDictionaryLocators?: PronunciationDictionaryVersionLocator[];
  /** Seed for deterministic sampling (integer between 0 and 4294967295) */
  seed?: number;
  /** Text normalization mode: 'auto', 'on', or 'off' */
  applyTextNormalization?: "auto" | "on" | "off";
}

interface DialogueInput {
  /** Voice ID for this segment */
  voiceId: string;
  /** Text to speak */
  text: string;
}

interface VoiceSettings {
  /** Stability (0.0 to 1.0) */
  stability?: number;
  /** Similarity boost (0.0 to 1.0) */
  similarity_boost?: number;
  /** Style exaggeration (0.0 to 1.0) */
  style?: number;
  /** Use speaker boost */
  use_speaker_boost?: boolean;
}

// Note: ModelSettingsResponseModel and PronunciationDictionaryVersionLocator
// are available via the ElevenLabs namespace: ElevenLabs.ModelSettingsResponseModel, etc.

Stream Dialogue

Stream dialogue generation for lower latency.

/**
 * @param request - Dialogue segments for streaming
 * @param requestOptions - Optional request configuration
 * @returns ReadableStream of dialogue audio chunks
 * @throws UnprocessableEntityError if request fails
 */
client.textToDialogue.stream(
  request: BodyTextToDialogueMultiVoiceStreamingV1TextToDialogueStreamPost,
  requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;

interface BodyTextToDialogueMultiVoiceStreamingV1TextToDialogueStreamPost {
  /** Array of dialogue segments */
  inputs: DialogueInput[];
  /** Model ID */
  modelId?: string;
  /** Optimize streaming latency (0-4) */
  optimize_streaming_latency?: number;
  /** Output format */
  output_format?: string;
}

Convert with Timestamps

Generate dialogue with character-level timing and voice segment information.

/**
 * @param request - Dialogue segments with timestamp request
 * @param requestOptions - Optional request configuration
 * @returns Audio with timestamps and voice segment markers
 * @throws UnprocessableEntityError if request fails
 */
client.textToDialogue.convertWithTimestamps(
  request: BodyTextToDialogueFullWithTimestamps,
  requestOptions?: RequestOptions
): HttpResponsePromise<AudioWithTimestampsAndVoiceSegmentsResponseModel>;

interface BodyTextToDialogueFullWithTimestamps {
  /** Array of dialogue segments */
  inputs: DialogueInput[];
  /** Model ID */
  modelId?: string;
}

interface AudioWithTimestampsAndVoiceSegmentsResponseModel {
  /** Base64 encoded audio */
  audio_base_64: string;
  /** Character-level alignment */
  alignment: Alignment;
  /** Voice segment information */
  voice_segments: VoiceSegment[];
}

interface Alignment {
  /** Array of characters */
  characters: string[];
  /** Start times for each character */
  character_start_times_seconds: number[];
  /** End times for each character */
  character_end_times_seconds: number[];
}

interface VoiceSegment {
  /** Voice ID used */
  voiceId: string;
  /** Segment start time in seconds */
  start_time: number;
  /** Segment end time in seconds */
  end_time: number;
  /** Character start index */
  character_start_index: number;
  /** Character end index */
  character_end_index: number;
}

Stream with Timestamps

Stream dialogue with timestamps as JSON chunks.

/**
 * @param request - Dialogue segments for streaming with timestamps
 * @param requestOptions - Optional request configuration
 * @returns Stream of JSON chunks with audio and timing data
 * @throws UnprocessableEntityError if request fails
 */
client.textToDialogue.streamWithTimestamps(
  request: BodyTextToDialogueStreamWithTimestamps,
  requestOptions?: RequestOptions
): HttpResponsePromise<Stream<StreamingAudioChunkWithTimestampsAndVoiceSegmentsResponseModel>>;

interface BodyTextToDialogueStreamWithTimestamps {
  /** Array of dialogue segments */
  inputs: DialogueInput[];
  /** Model ID */
  modelId?: string;
  /** Flush buffer after each chunk */
  flush?: boolean;
}

interface StreamingAudioChunkWithTimestampsAndVoiceSegmentsResponseModel {
  /** Base64 encoded audio chunk */
  audio_base_64: string;
  /** Alignment for this chunk */
  alignment?: Alignment;
  /** Voice segments in this chunk */
  voice_segments?: VoiceSegment[];
  /** Whether this is the final chunk */
  is_final?: boolean;
}

Usage Examples

Basic Two-Speaker Dialogue

import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { writeFile } from "fs/promises";

const client = new ElevenLabsClient({ apiKey: "your-api-key" });

// Create a conversation between two speakers
const dialogue = await client.textToDialogue.convert({
  inputs: [
    {
      voiceId: "voice-id-1",
      text: "Hello! How are you today?",
    },
    {
      voiceId: "voice-id-2",
      text: "I'm doing great, thanks for asking!",
    },
    {
      voiceId: "voice-id-1",
      text: "That's wonderful to hear.",
    },
  ],
  modelId: "eleven_multilingual_v2",
});

// Save the dialogue audio
const chunks: Uint8Array[] = [];
for await (const chunk of dialogue) {
  chunks.push(chunk);
}
await writeFile("conversation.mp3", Buffer.concat(chunks));

Multi-Speaker Podcast

// Create a podcast with three hosts
const podcastDialogue = await client.textToDialogue.convert({
  inputs: [
    {
      voiceId: "host1-voice-id",
      text: "Welcome to our podcast! I'm your first host.",
    },
    {
      voiceId: "host2-voice-id",
      text: "And I'm your second host. Great to be here!",
    },
    {
      voiceId: "host3-voice-id",
      text: "And I'm the third host. Let's dive into today's topic.",
    },
    {
      voiceId: "host1-voice-id",
      text: "Today we're discussing artificial intelligence.",
    },
  ],
});

Custom Voice Settings per Speaker

// Apply different voice settings to each speaker
const dialogue = await client.textToDialogue.convert({
  inputs: [
    {
      voiceId: "narrator-voice-id",
      text: "The story begins on a dark and stormy night.",
      voice_settings: {
        stability: 0.8,
        similarity_boost: 0.9,
        style: 0.3,
      },
    },
    {
      voiceId: "character-voice-id",
      text: "Who's there?",
      voice_settings: {
        stability: 0.5, // More expressive
        similarity_boost: 0.8,
        style: 0.7, // More dramatic
      },
    },
  ],
});

Streaming Dialogue

// Stream dialogue for lower latency
const stream = await client.textToDialogue.stream({
  inputs: [
    { voiceId: "voice1", text: "This is the first line." },
    { voiceId: "voice2", text: "And this is the second." },
    { voiceId: "voice1", text: "Back to the first speaker." },
  ],
  optimize_streaming_latency: 3,
});

for await (const chunk of stream) {
  // Process audio chunks as they arrive
  console.log("Received chunk:", chunk.length);
}

Dialogue with Timestamps

// Get timing information for each voice segment
const result = await client.textToDialogue.convertWithTimestamps({
  inputs: [
    { voiceId: "voice1", text: "Hello there!" },
    { voiceId: "voice2", text: "Hi! How are you?" },
  ],
  modelId: "eleven_turbo_v2_5",
});

console.log("Audio:", result.audio_base_64);

// Character-level timing
console.log("Characters:", result.alignment.characters);
console.log("Start times:", result.alignment.character_start_times_seconds);
console.log("End times:", result.alignment.character_end_times_seconds);

// Voice segment timing
for (const segment of result.voice_segments) {
  console.log(`Voice ${segment.voiceId}:`);
  console.log(`  Time: ${segment.start_time}s - ${segment.end_time}s`);
  console.log(`  Characters: ${segment.character_start_index} - ${segment.character_end_index}`);
}

Interview Dialogue

// Generate an interview conversation
async function generateInterview(
  questions: string[],
  answers: string[],
  interviewerVoice: string,
  intervieweeVoice: string
): Promise<ReadableStream<Uint8Array>> {
  const inputs: DialogueInput[] = [];

  for (let i = 0; i < questions.length; i++) {
    // Add question
    inputs.push({
      voiceId: interviewerVoice,
      text: questions[i],
    });

    // Add answer
    if (answers[i]) {
      inputs.push({
        voiceId: intervieweeVoice,
        text: answers[i],
      });
    }
  }

  return await client.textToDialogue.convert({
    inputs,
    modelId: "eleven_multilingual_v2",
  });
}

const interview = await generateInterview(
  [
    "What inspired you to start your company?",
    "What were the biggest challenges you faced?",
    "What advice would you give to aspiring entrepreneurs?",
  ],
  [
    "I saw a gap in the market that needed to be filled.",
    "The biggest challenge was definitely securing initial funding.",
    "My advice is to never give up, even when things get tough.",
  ],
  "interviewer-voice-id",
  "interviewee-voice-id"
);

Audiobook with Narrator and Characters

// Create audiobook with narrator and character voices
const audiobook = await client.textToDialogue.convert({
  inputs: [
    {
      voiceId: "narrator-voice",
      text: "Chapter One. Sarah walked into the room.",
    },
    {
      voiceId: "sarah-voice",
      text: "Is anyone here?",
    },
    {
      voiceId: "narrator-voice",
      text: "A voice responded from the shadows.",
    },
    {
      voiceId: "mysterious-voice",
      text: "I've been waiting for you.",
    },
  ],
});

Stream with Real-time Timestamps

// Stream dialogue with timing information
const stream = await client.textToDialogue.streamWithTimestamps({
  inputs: [
    { voiceId: "voice1", text: "Streaming dialogue example." },
    { voiceId: "voice2", text: "With timestamps included!" },
  ],
  flush: true,
});

for await (const chunk of stream) {
  console.log("Audio chunk:", chunk.audio_base_64);

  if (chunk.alignment) {
    console.log("Characters:", chunk.alignment.characters);
  }

  if (chunk.voice_segments) {
    console.log("Voice segments:", chunk.voice_segments);
  }

  if (chunk.is_final) {
    console.log("Final chunk received");
  }
}

Educational Content

// Generate educational dialogue between teacher and student
const educationalContent = await client.textToDialogue.convert({
  inputs: [
    {
      voiceId: "teacher-voice",
      text: "Today we're learning about photosynthesis. Can anyone tell me what it is?",
    },
    {
      voiceId: "student-voice",
      text: "It's how plants make food from sunlight!",
    },
    {
      voiceId: "teacher-voice",
      text: "Exactly right! Plants use sunlight, water, and carbon dioxide.",
    },
    {
      voiceId: "student-voice",
      text: "And they produce oxygen as a byproduct, right?",
    },
    {
      voiceId: "teacher-voice",
      text: "Perfect! You've been paying attention.",
    },
  ],
});

Drama Script

// Convert a drama script to audio
interface ScriptLine {
  character: string;
  line: string;
}

const voiceMap: Record<string, string> = {
  "Alice": "alice-voice-id",
  "Bob": "bob-voice-id",
  "Narrator": "narrator-voice-id",
};

const script: ScriptLine[] = [
  { character: "Narrator", line: "Act 1, Scene 1. A park bench." },
  { character: "Alice", line: "I can't believe you said that!" },
  { character: "Bob", line: "I'm sorry, I didn't mean it." },
  { character: "Alice", line: "Actions speak louder than words, Bob." },
];

const dialogue = await client.textToDialogue.convert({
  inputs: script.map(line => ({
    voiceId: voiceMap[line.character],
    text: line.line,
  })),
});