BETA: This API is subject to change without regard to backwards compatibility.
The Beta Conversational AI SDK provides low-level client-side classes for building real-time conversational AI applications. These classes enable direct WebSocket-based conversations with AI agents, client-side tool execution, and custom audio interface implementations.
This SDK is included in the @elevenlabs/elevenlabs-js package but requires direct import from internal paths as these features are not re-exported from the main entry point.
Real-time conversational AI session management with WebSocket communication, audio streaming, and event handling.
import { Conversation } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/Conversation";class Conversation extends EventEmitter {
constructor(options: {
/** ElevenLabsClient instance (optional, creates new client if not provided) */
client?: ElevenLabsClient;
/** Conversation client interface (optional, uses main client by default) */
conversationClient?: ConversationClient;
/** WebSocket factory for creating connections (optional, uses default) */
webSocketFactory?: WebSocketFactory;
/** ID of the agent to connect to */
agentId: string;
/** Whether authentication is required */
requiresAuth: boolean;
/** Audio interface implementation for audio I/O */
audioInterface: AudioInterface;
/** Optional conversation configuration */
config?: ConversationInitiationData;
/** Optional client tools for tool execution */
clientTools?: ClientTools;
/** Callback for agent text responses */
callbackAgentResponse?: (response: string) => void;
/** Callback for agent response corrections */
callbackAgentResponseCorrection?: (original: string, corrected: string) => void;
/** Callback for user transcript updates */
callbackUserTranscript?: (transcript: string) => void;
/** Callback for latency measurements */
callbackLatencyMeasurement?: (latencyMs: number) => void;
/** Callback for all WebSocket messages */
callbackMessageReceived?: (message: any) => void;
});
}/**
* Starts the conversation session.
*
* Establishes WebSocket connection, initializes audio interface, and begins
* the conversation loop. Will run until endSession is called.
*
* @returns Promise that resolves when connection is established
*/
async startSession(): Promise<void>;
/**
* Ends the conversation session and cleans up resources.
*
* Stops audio interface, closes WebSocket connection, and emits 'session_ended' event.
*/
endSession(): void;/**
* Send a text message from the user to the agent.
*
* @param text - The text message to send to the agent
* @throws Error if session is not started or WebSocket not connected
*/
sendUserMessage(text: string): void;
/**
* Send a contextual update to the conversation.
*
* Contextual updates are non-interrupting content that updates conversation
* state without directly prompting the agent. Useful for providing additional
* context or information that may inform agent responses.
*
* @param text - The contextual information to send
* @throws Error if session is not started or WebSocket not connected
*/
sendContextualUpdate(text: string): void;
/**
* Register user activity to prevent session timeout.
*
* Sends a ping to the orchestrator to reset the timeout timer. Call periodically
* during user interaction to keep the session alive.
*
* @throws Error if session is not started or WebSocket not connected
*/
registerUserActivity(): void;/**
* Get the conversation ID if available.
*
* The conversation ID is assigned after the session starts and can be used
* for retrieving conversation history or analytics.
*
* @returns The conversation ID or undefined if not yet available
*/
getConversationId(): string | undefined;
/**
* Check if the session is currently active.
*
* @returns True if the session is active and WebSocket is connected
*/
isSessionActive(): boolean;The Conversation class extends EventEmitter and emits the following events:
// Emitted when the WebSocket connection is established
conversation.on("session_started", () => void);
// Emitted when a conversation ID is assigned
conversation.on("conversation_started", (conversationId: string) => void);
// Emitted when the session ends
conversation.on("session_ended", (conversationId?: string, code?: number, reason?: string) => void);
// Emitted when an error occurs
conversation.on("error", (error: Error) => void);import { Conversation } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/Conversation";
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { AudioInterface } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/AudioInterface";
import { ClientTools } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/ClientTools";
// Create audio interface implementation
class MyAudioInterface extends AudioInterface {
start(inputCallback: (audio: Buffer) => void): void {
// Initialize microphone and start capturing audio
// Call inputCallback with 16-bit PCM mono 16kHz audio chunks
}
stop(): void {
// Stop audio capture and cleanup
}
output(audio: Buffer): void {
// Play audio to speaker (16-bit PCM mono 16kHz)
}
interrupt(): void {
// Stop current audio playback
}
}
// Initialize client and tools
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
const audioInterface = new MyAudioInterface();
const clientTools = new ClientTools();
// Register client-side tools
clientTools.register("get_current_time", () => {
return new Date().toISOString();
});
// Create conversation
const conversation = new Conversation({
client,
agentId: "your-agent-id",
requiresAuth: true,
audioInterface,
clientTools,
callbackAgentResponse: (response) => {
console.log("Agent:", response);
},
callbackUserTranscript: (transcript) => {
console.log("User:", transcript);
},
});
// Handle events
conversation.on("session_started", () => {
console.log("Session started");
});
conversation.on("conversation_started", (conversationId) => {
console.log("Conversation ID:", conversationId);
});
conversation.on("error", (error) => {
console.error("Error:", error);
});
// Start the session
await conversation.startSession();
// Send text messages
conversation.sendUserMessage("Hello, how are you?");
// Send contextual updates
conversation.sendContextualUpdate("User is viewing the dashboard");
// Keep session alive
setInterval(() => {
if (conversation.isSessionActive()) {
conversation.registerUserActivity();
}
}, 30000);
// End session when done
conversation.endSession();Handles registration and execution of client-side tools that can be called by the AI agent. Supports both synchronous and asynchronous tools with non-blocking execution.
import { ClientTools } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/ClientTools";class ClientTools {
constructor();
}/**
* Register a new tool that can be called by the AI agent.
*
* @param toolName - Unique identifier for the tool
* @param handler - Function that implements the tool's logic
* @param isAsync - Whether handler is async (auto-detected if not specified)
* @throws Error if handler is not a function or tool name already registered
*/
register(
toolName: string,
handler: (parameters: Record<string, any>) => any | Promise<any>,
isAsync?: boolean
): void;
/**
* Unregister a tool.
*
* @param toolName - Name of the tool to unregister
* @returns True if the tool was found and removed
*/
unregister(toolName: string): boolean;
/**
* Clear all registered tools.
*/
clear(): void;/**
* Execute a registered tool with the given parameters.
*
* @param toolName - Name of the tool to execute
* @param parameters - Parameters to pass to the tool
* @returns The result of the tool execution
* @throws Error if tool is not registered
*/
async handle(toolName: string, parameters: Record<string, any>): Promise<any>;
/**
* Execute a tool and send its result via callback.
*
* Non-blocking method that handles both sync and async tools. Executes in
* a separate event loop tick to avoid blocking the main conversation thread.
*
* @param toolName - Name of the tool to execute
* @param parameters - Parameters (must include tool_call_id)
* @param callback - Function to call with the result
*/
executeToolAsync(
toolName: string,
parameters: Record<string, any>,
callback: (response: ClientToolResultEvent) => void
): void;/**
* Get a list of all registered tool names.
*
* @returns Array of tool names
*/
getRegisteredTools(): string[];
/**
* Check if a tool is registered.
*
* @param toolName - Name of the tool to check
* @returns True if the tool is registered
*/
isToolRegistered(toolName: string): boolean;Synchronous Tool:
import { ClientTools } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/ClientTools";
const tools = new ClientTools();
// Register synchronous tool
tools.register("calculate_sum", (params) => {
const { a, b } = params;
return a + b;
});
// Execute tool
const result = await tools.handle("calculate_sum", { a: 5, b: 3 });
console.log(result); // 8Asynchronous Tool:
// Register async tool
tools.register("fetch_weather", async (params) => {
const { city } = params;
const response = await fetch(`https://api.weather.com/${city}`);
return await response.json();
});
// Execute tool
const weather = await tools.handle("fetch_weather", { city: "London" });Tool Management:
// Check if tool exists
if (tools.isToolRegistered("calculate_sum")) {
console.log("Tool is registered");
}
// List all tools
console.log("Available tools:", tools.getRegisteredTools());
// Unregister a tool
tools.unregister("calculate_sum");
// Clear all tools
tools.clear();Abstract base class for implementing custom audio input/output handling in conversational AI applications. Provides the interface contract for real-time audio processing.
import { AudioInterface } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/AudioInterface";abstract class AudioInterface {
/**
* Starts the audio interface.
*
* Called once before the conversation starts. The inputCallback should be
* called regularly with input audio chunks from the user. Audio must be in
* 16-bit PCM mono format at 16kHz. Recommended chunk size is 4000 samples
* (250 milliseconds).
*
* @param inputCallback - Function to call with audio chunks from microphone
*/
abstract start(inputCallback: (audio: Buffer) => void): void;
/**
* Stops the audio interface.
*
* Called once after the conversation ends. Should clean up any resources
* used by the audio interface and stop any audio streams. Do not call the
* inputCallback from start after this method is called.
*/
abstract stop(): void;
/**
* Output audio to the user.
*
* The audio input is in 16-bit PCM mono format at 16kHz. Implementations
* can choose to do additional buffering. This method should return quickly
* and not block the calling thread.
*
* @param audio - Audio data to output to the speaker
*/
abstract output(audio: Buffer): void;
/**
* Interruption signal to stop any audio output.
*
* User has interrupted the agent and all previously buffered audio output
* should be stopped immediately.
*/
abstract interrupt(): void;
}All audio must be:
import { AudioInterface } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/AudioInterface";
import { Readable, Writable } from "stream";
// Example using a hypothetical audio library
class CustomAudioInterface extends AudioInterface {
private micStream?: Readable;
private speakerStream?: Writable;
private inputCallback?: (audio: Buffer) => void;
private audioBuffer: Buffer[] = [];
private isPlaying: boolean = false;
start(inputCallback: (audio: Buffer) => void): void {
this.inputCallback = inputCallback;
// Initialize microphone stream (16kHz, 16-bit PCM, mono)
this.micStream = getMicrophoneStream({
sampleRate: 16000,
channels: 1,
bitDepth: 16,
});
// Capture audio chunks and send to callback
this.micStream.on("data", (chunk: Buffer) => {
if (this.inputCallback) {
this.inputCallback(chunk);
}
});
// Initialize speaker stream
this.speakerStream = getSpeakerStream({
sampleRate: 16000,
channels: 1,
bitDepth: 16,
});
}
stop(): void {
// Stop microphone
if (this.micStream) {
this.micStream.destroy();
this.micStream = undefined;
}
// Stop speaker
if (this.speakerStream) {
this.speakerStream.end();
this.speakerStream = undefined;
}
// Clear buffers
this.audioBuffer = [];
this.inputCallback = undefined;
this.isPlaying = false;
}
output(audio: Buffer): void {
// Add to buffer
this.audioBuffer.push(audio);
// Start playing if not already playing
if (!this.isPlaying && this.speakerStream) {
this.playNextChunk();
}
}
interrupt(): void {
// Clear buffered audio
this.audioBuffer = [];
// Stop current playback
if (this.speakerStream) {
this.speakerStream.cork();
this.speakerStream.uncork();
}
this.isPlaying = false;
}
private playNextChunk(): void {
if (this.audioBuffer.length === 0) {
this.isPlaying = false;
return;
}
this.isPlaying = true;
const chunk = this.audioBuffer.shift()!;
if (this.speakerStream) {
this.speakerStream.write(chunk, () => {
this.playNextChunk();
});
}
}
}A basic reference implementation of AudioInterface for Node.js. This provides a foundation for audio I/O but is intended primarily as a starting point for custom implementations.
import { DefaultAudioInterface } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/DefaultAudioInterface";/**
* Default audio interface implementation for Node.js using basic audio processing.
*
* Note: This is a basic placeholder implementation. For production use, integrate
* professional audio libraries like 'naudiodon', 'mic'/'speaker', or similar.
*/
class DefaultAudioInterface extends AudioInterface {
/** Audio input buffer size: 4000 frames (250ms at 16kHz) */
static readonly INPUT_FRAMES_PER_BUFFER = 4000;
/** Audio output buffer size: 1000 frames (62.5ms at 16kHz) */
static readonly OUTPUT_FRAMES_PER_BUFFER = 1000;
/** Sample rate: 16kHz (required by ElevenLabs) */
static readonly SAMPLE_RATE = 16000;
/** Channels: 1 (mono, required by ElevenLabs) */
static readonly CHANNELS = 1;
/**
* Starts the audio interface.
* @param inputCallback - Function to call with audio chunks from the microphone
*/
start(inputCallback: (audio: Buffer) => void): void;
/**
* Stops the audio interface and cleans up resources.
*/
stop(): void;
/**
* Output audio to the user (queues audio for playback).
* @param audio - Audio data to output to the speaker
*/
output(audio: Buffer): void;
/**
* Interruption signal to stop any audio output (clears playback queue).
*/
interrupt(): void;
}
/**
* Factory function to create a DefaultAudioInterface instance.
*/
function createDefaultAudioInterface(): DefaultAudioInterface;import { Conversation } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/Conversation";
import { DefaultAudioInterface } from "@elevenlabs/elevenlabs-js/api/resources/conversationalAi/conversation/DefaultAudioInterface";
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
const audioInterface = new DefaultAudioInterface();
const conversation = new Conversation({
client,
agentId: "your-agent-id",
requiresAuth: true,
audioInterface,
callbackAgentResponse: (text) => console.log("Agent:", text),
callbackUserTranscript: (text) => console.log("User:", text),
});
await conversation.startSession();Important: The DefaultAudioInterface is a placeholder implementation that:
mic, naudiodon, or node-record-lpcm16 for microphone inputspeaker, naudiodon, or similar for audio outputFor production applications, extend DefaultAudioInterface or implement AudioInterface directly with proper audio device integration.
Configuration for initializing a conversation session.
interface ConversationInitiationData {
/** Additional body parameters for custom LLM endpoints */
extraBody?: Record<string, any>;
/** Override conversation configuration settings */
conversationConfigOverride?: Record<string, any>;
/** Dynamic variables to pass to the conversation */
dynamicVariables?: Record<string, any>;
}requiresAuth: true for production applications