Generate multi-speaker dialogue from text with voice ID pairs and precise timing information. Create natural conversations between multiple speakers with synchronized audio output.
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Access this API via: client.textToDialogueGenerate dialogue audio from text segments assigned to different voices.
/**
* @param request - Dialogue segments with voice assignments
* @param requestOptions - Optional request configuration
* @returns ReadableStream of dialogue audio
* @throws UnprocessableEntityError if request fails
*/
client.textToDialogue.convert(
request: BodyTextToDialogueMultiVoiceV1TextToDialoguePost,
requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;
interface BodyTextToDialogueMultiVoiceV1TextToDialoguePost {
/** Output format of the generated audio */
outputFormat?: string;
/** A list of dialogue inputs, each containing text and a voice ID which will be converted into speech. (REQUIRED) */
inputs: DialogueInput[];
/** Identifier of the model that will be used */
modelId?: string;
/** Language code (ISO 639-1) used to enforce a language for the model and text normalization */
languageCode?: string;
/** Settings controlling the dialogue generation */
settings?: ModelSettingsResponseModel;
/** A list of pronunciation dictionary locators (id, version_id) to be applied to the text (max 3) */
pronunciationDictionaryLocators?: PronunciationDictionaryVersionLocator[];
/** Seed for deterministic sampling (integer between 0 and 4294967295) */
seed?: number;
/** Text normalization mode: 'auto', 'on', or 'off' */
applyTextNormalization?: "auto" | "on" | "off";
}
interface DialogueInput {
/** Voice ID for this segment */
voiceId: string;
/** Text to speak */
text: string;
}
interface VoiceSettings {
/** Stability (0.0 to 1.0) */
stability?: number;
/** Similarity boost (0.0 to 1.0) */
similarity_boost?: number;
/** Style exaggeration (0.0 to 1.0) */
style?: number;
/** Use speaker boost */
use_speaker_boost?: boolean;
}
// Note: ModelSettingsResponseModel and PronunciationDictionaryVersionLocator
// are available via the ElevenLabs namespace: ElevenLabs.ModelSettingsResponseModel, etc.Stream dialogue generation for lower latency.
/**
* @param request - Dialogue segments for streaming
* @param requestOptions - Optional request configuration
* @returns ReadableStream of dialogue audio chunks
* @throws UnprocessableEntityError if request fails
*/
client.textToDialogue.stream(
request: BodyTextToDialogueMultiVoiceStreamingV1TextToDialogueStreamPost,
requestOptions?: RequestOptions
): HttpResponsePromise<ReadableStream<Uint8Array>>;
interface BodyTextToDialogueMultiVoiceStreamingV1TextToDialogueStreamPost {
/** Array of dialogue segments */
inputs: DialogueInput[];
/** Model ID */
modelId?: string;
/** Optimize streaming latency (0-4) */
optimize_streaming_latency?: number;
/** Output format */
output_format?: string;
}Generate dialogue with character-level timing and voice segment information.
/**
* @param request - Dialogue segments with timestamp request
* @param requestOptions - Optional request configuration
* @returns Audio with timestamps and voice segment markers
* @throws UnprocessableEntityError if request fails
*/
client.textToDialogue.convertWithTimestamps(
request: BodyTextToDialogueFullWithTimestamps,
requestOptions?: RequestOptions
): HttpResponsePromise<AudioWithTimestampsAndVoiceSegmentsResponseModel>;
interface BodyTextToDialogueFullWithTimestamps {
/** Array of dialogue segments */
inputs: DialogueInput[];
/** Model ID */
modelId?: string;
}
interface AudioWithTimestampsAndVoiceSegmentsResponseModel {
/** Base64 encoded audio */
audio_base_64: string;
/** Character-level alignment */
alignment: Alignment;
/** Voice segment information */
voice_segments: VoiceSegment[];
}
interface Alignment {
/** Array of characters */
characters: string[];
/** Start times for each character */
character_start_times_seconds: number[];
/** End times for each character */
character_end_times_seconds: number[];
}
interface VoiceSegment {
/** Voice ID used */
voiceId: string;
/** Segment start time in seconds */
start_time: number;
/** Segment end time in seconds */
end_time: number;
/** Character start index */
character_start_index: number;
/** Character end index */
character_end_index: number;
}Stream dialogue with timestamps as JSON chunks.
/**
* @param request - Dialogue segments for streaming with timestamps
* @param requestOptions - Optional request configuration
* @returns Stream of JSON chunks with audio and timing data
* @throws UnprocessableEntityError if request fails
*/
client.textToDialogue.streamWithTimestamps(
request: BodyTextToDialogueStreamWithTimestamps,
requestOptions?: RequestOptions
): HttpResponsePromise<Stream<StreamingAudioChunkWithTimestampsAndVoiceSegmentsResponseModel>>;
interface BodyTextToDialogueStreamWithTimestamps {
/** Array of dialogue segments */
inputs: DialogueInput[];
/** Model ID */
modelId?: string;
/** Flush buffer after each chunk */
flush?: boolean;
}
interface StreamingAudioChunkWithTimestampsAndVoiceSegmentsResponseModel {
/** Base64 encoded audio chunk */
audio_base_64: string;
/** Alignment for this chunk */
alignment?: Alignment;
/** Voice segments in this chunk */
voice_segments?: VoiceSegment[];
/** Whether this is the final chunk */
is_final?: boolean;
}import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { writeFile } from "fs/promises";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Create a conversation between two speakers
const dialogue = await client.textToDialogue.convert({
inputs: [
{
voiceId: "voice-id-1",
text: "Hello! How are you today?",
},
{
voiceId: "voice-id-2",
text: "I'm doing great, thanks for asking!",
},
{
voiceId: "voice-id-1",
text: "That's wonderful to hear.",
},
],
modelId: "eleven_multilingual_v2",
});
// Save the dialogue audio
const chunks: Uint8Array[] = [];
for await (const chunk of dialogue) {
chunks.push(chunk);
}
await writeFile("conversation.mp3", Buffer.concat(chunks));// Create a podcast with three hosts
const podcastDialogue = await client.textToDialogue.convert({
inputs: [
{
voiceId: "host1-voice-id",
text: "Welcome to our podcast! I'm your first host.",
},
{
voiceId: "host2-voice-id",
text: "And I'm your second host. Great to be here!",
},
{
voiceId: "host3-voice-id",
text: "And I'm the third host. Let's dive into today's topic.",
},
{
voiceId: "host1-voice-id",
text: "Today we're discussing artificial intelligence.",
},
],
});// Apply different voice settings to each speaker
const dialogue = await client.textToDialogue.convert({
inputs: [
{
voiceId: "narrator-voice-id",
text: "The story begins on a dark and stormy night.",
voice_settings: {
stability: 0.8,
similarity_boost: 0.9,
style: 0.3,
},
},
{
voiceId: "character-voice-id",
text: "Who's there?",
voice_settings: {
stability: 0.5, // More expressive
similarity_boost: 0.8,
style: 0.7, // More dramatic
},
},
],
});// Stream dialogue for lower latency
const stream = await client.textToDialogue.stream({
inputs: [
{ voiceId: "voice1", text: "This is the first line." },
{ voiceId: "voice2", text: "And this is the second." },
{ voiceId: "voice1", text: "Back to the first speaker." },
],
optimize_streaming_latency: 3,
});
for await (const chunk of stream) {
// Process audio chunks as they arrive
console.log("Received chunk:", chunk.length);
}// Get timing information for each voice segment
const result = await client.textToDialogue.convertWithTimestamps({
inputs: [
{ voiceId: "voice1", text: "Hello there!" },
{ voiceId: "voice2", text: "Hi! How are you?" },
],
modelId: "eleven_turbo_v2_5",
});
console.log("Audio:", result.audio_base_64);
// Character-level timing
console.log("Characters:", result.alignment.characters);
console.log("Start times:", result.alignment.character_start_times_seconds);
console.log("End times:", result.alignment.character_end_times_seconds);
// Voice segment timing
for (const segment of result.voice_segments) {
console.log(`Voice ${segment.voiceId}:`);
console.log(` Time: ${segment.start_time}s - ${segment.end_time}s`);
console.log(` Characters: ${segment.character_start_index} - ${segment.character_end_index}`);
}// Generate an interview conversation
async function generateInterview(
questions: string[],
answers: string[],
interviewerVoice: string,
intervieweeVoice: string
): Promise<ReadableStream<Uint8Array>> {
const inputs: DialogueInput[] = [];
for (let i = 0; i < questions.length; i++) {
// Add question
inputs.push({
voiceId: interviewerVoice,
text: questions[i],
});
// Add answer
if (answers[i]) {
inputs.push({
voiceId: intervieweeVoice,
text: answers[i],
});
}
}
return await client.textToDialogue.convert({
inputs,
modelId: "eleven_multilingual_v2",
});
}
const interview = await generateInterview(
[
"What inspired you to start your company?",
"What were the biggest challenges you faced?",
"What advice would you give to aspiring entrepreneurs?",
],
[
"I saw a gap in the market that needed to be filled.",
"The biggest challenge was definitely securing initial funding.",
"My advice is to never give up, even when things get tough.",
],
"interviewer-voice-id",
"interviewee-voice-id"
);// Create audiobook with narrator and character voices
const audiobook = await client.textToDialogue.convert({
inputs: [
{
voiceId: "narrator-voice",
text: "Chapter One. Sarah walked into the room.",
},
{
voiceId: "sarah-voice",
text: "Is anyone here?",
},
{
voiceId: "narrator-voice",
text: "A voice responded from the shadows.",
},
{
voiceId: "mysterious-voice",
text: "I've been waiting for you.",
},
],
});// Stream dialogue with timing information
const stream = await client.textToDialogue.streamWithTimestamps({
inputs: [
{ voiceId: "voice1", text: "Streaming dialogue example." },
{ voiceId: "voice2", text: "With timestamps included!" },
],
flush: true,
});
for await (const chunk of stream) {
console.log("Audio chunk:", chunk.audio_base_64);
if (chunk.alignment) {
console.log("Characters:", chunk.alignment.characters);
}
if (chunk.voice_segments) {
console.log("Voice segments:", chunk.voice_segments);
}
if (chunk.is_final) {
console.log("Final chunk received");
}
}// Generate educational dialogue between teacher and student
const educationalContent = await client.textToDialogue.convert({
inputs: [
{
voiceId: "teacher-voice",
text: "Today we're learning about photosynthesis. Can anyone tell me what it is?",
},
{
voiceId: "student-voice",
text: "It's how plants make food from sunlight!",
},
{
voiceId: "teacher-voice",
text: "Exactly right! Plants use sunlight, water, and carbon dioxide.",
},
{
voiceId: "student-voice",
text: "And they produce oxygen as a byproduct, right?",
},
{
voiceId: "teacher-voice",
text: "Perfect! You've been paying attention.",
},
],
});// Convert a drama script to audio
interface ScriptLine {
character: string;
line: string;
}
const voiceMap: Record<string, string> = {
"Alice": "alice-voice-id",
"Bob": "bob-voice-id",
"Narrator": "narrator-voice-id",
};
const script: ScriptLine[] = [
{ character: "Narrator", line: "Act 1, Scene 1. A park bench." },
{ character: "Alice", line: "I can't believe you said that!" },
{ character: "Bob", line: "I'm sorry, I didn't mean it." },
{ character: "Alice", line: "Actions speak louder than words, Bob." },
];
const dialogue = await client.textToDialogue.convert({
inputs: script.map(line => ({
voiceId: voiceMap[line.character],
text: line.line,
})),
});