Complete type definitions for conversational AI agent configuration including conversation settings, TTS/ASR configuration, LLM models, and all related interfaces.
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
// Use these types when creating or configuring agents
const client = new ElevenLabsClient({ apiKey: "your-api-key" });Complete type definitions for agent configuration.
/**
* Main conversation configuration
*/
interface ConversationalConfig {
/** Agent configuration */
agent: AgentConfig;
/** Conversation-level configuration */
conversation?: ConversationConfig;
/** Text-to-speech configuration */
tts?: TtsConversationalConfigOutput;
/** Speech-to-text configuration */
asr?: AsrConversationalConfig;
/** Voice activity detection configuration */
vad?: VadConfig;
/** Turn management configuration */
turn?: TurnConfig;
}
/**
* Conversation-level configuration
*/
interface ConversationConfig {
/** If enabled, audio will not be processed and only text will be used */
textOnly?: boolean;
/** Maximum duration of a conversation in seconds */
maxDurationSeconds?: number;
/** Events that will be sent to the client */
clientEvents?: ClientEvent[];
/** Enable real-time monitoring via WebSocket */
monitoringEnabled?: boolean;
/** Events sent to monitoring connections */
monitoringEvents?: ClientEvent[];
}
/**
* Agent configuration including prompt and behavior
*/
interface AgentConfig {
/** First message the agent will say (empty = agent waits for user) */
firstMessage?: string;
/** Language of the agent (used for ASR and TTS) */
language?: string;
/** Enable Hinglish mode when language is Hindi */
hinglishMode?: boolean;
/** Configuration for dynamic variables */
dynamicVariables?: DynamicVariablesConfig;
/** Prevent interruptions during first message */
disableFirstMessageInterruptions?: boolean;
/** The prompt configuration for the agent */
prompt?: PromptAgentApiModelOutput;
}
/**
* Dynamic variables configuration
*/
interface DynamicVariablesConfig {
/** Dictionary of dynamic variable placeholders and values */
dynamicVariablePlaceholders?: Record<string, any>;
}
/**
* Complete prompt and LLM configuration for agents
*/
interface PromptAgentApiModelOutput {
/** The prompt text for the agent */
prompt?: string;
/** The LLM model to use */
llm?: Llm;
/** Reasoning effort (for supported models) */
reasoningEffort?: LlmReasoningEffort;
/** Max tokens for thinking (0 to disable if supported) */
thinkingBudget?: number;
/** Temperature for the LLM */
temperature?: number;
/** Maximum tokens the LLM can predict (if > 0) */
maxTokens?: number;
/** IDs of tools used by the agent */
toolIds?: string[];
/** Built-in system tools */
builtInTools?: BuiltInToolsOutput;
/** MCP server IDs */
mcpServerIds?: string[];
/** Native MCP server IDs */
nativeMcpServerIds?: string[];
/** Knowledge bases for the agent */
knowledgeBase?: KnowledgeBaseLocator[];
/** Custom LLM definition (if llm is 'CUSTOM_LLM') */
customLlm?: CustomLlm;
/** Remove default personality lines from system prompt */
ignoreDefaultPersonality?: boolean;
/** RAG configuration */
rag?: RagConfig;
/** Timezone for current time in system prompt */
timezone?: string;
/** Backup LLM cascading configuration */
backupLlmConfig?: any;
/** Time before cascading to backup LLM (2-15 seconds) */
cascadeTimeoutSeconds?: number;
/** Tools the agent can use (deprecated, use toolIds) */
tools?: any[];
}
/**
* LLM model options
*/
enum Llm {
GPT_4O_MINI = "gpt-4o-mini",
GPT_4O = "gpt-4o",
GPT_4 = "gpt-4",
GPT_4_TURBO = "gpt-4-turbo",
GPT_4_1 = "gpt-4.1",
GPT_4_1_MINI = "gpt-4.1-mini",
GPT_4_1_NANO = "gpt-4.1-nano",
GPT_5 = "gpt-5",
GPT_5_1 = "gpt-5.1",
GPT_5_2 = "gpt-5.2",
GPT_5_2_CHAT_LATEST = "gpt-5.2-chat-latest",
GPT_5_MINI = "gpt-5-mini",
GPT_5_NANO = "gpt-5-nano",
GPT_3_5_TURBO = "gpt-3.5-turbo",
GEMINI_1_5_PRO = "gemini-1.5-pro",
GEMINI_1_5_FLASH = "gemini-1.5-flash",
GEMINI_2_0_FLASH = "gemini-2.0-flash",
GEMINI_2_0_FLASH_LITE = "gemini-2.0-flash-lite",
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
GEMINI_2_5_FLASH = "gemini-2.5-flash",
GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
CLAUDE_SONNET_4_5 = "claude-sonnet-4-5",
CLAUDE_SONNET_4 = "claude-sonnet-4",
CLAUDE_HAIKU_4_5 = "claude-haiku-4-5",
CLAUDE_3_7_SONNET = "claude-3-7-sonnet",
CLAUDE_3_5_SONNET = "claude-3-5-sonnet",
CLAUDE_3_5_SONNET_V1 = "claude-3-5-sonnet-v1",
CLAUDE_3_HAIKU = "claude-3-haiku",
GROK_BETA = "grok-beta",
CUSTOM_LLM = "custom-llm",
// Additional model variants available
}
/**
* LLM reasoning effort levels
*/
enum LlmReasoningEffort {
LOW = "low",
MEDIUM = "medium",
HIGH = "high",
}
/**
* Turn management configuration
*/
interface TurnConfig {
/** Maximum wait time for user reply before re-engaging */
turnTimeout?: number;
/** Wait time for user to start if first message is empty */
initialWaitTime?: number;
/** Maximum wait since last user speech before ending call */
silenceEndCallTimeout?: number;
/** Configuration for soft timeout feedback */
softTimeoutConfig?: SoftTimeoutConfig;
/** Agent's eagerness to respond */
turnEagerness?: TurnEagerness;
}
/**
* Soft timeout configuration for LLM responses
*/
interface SoftTimeoutConfig {
/** Time before showing message while waiting for LLM (-1 to disable) */
timeoutSeconds?: number;
/** Message to show when soft timeout is reached */
message?: string;
/** Generate soft timeout message dynamically via LLM */
useLlmGeneratedMessage?: boolean;
}
/**
* Agent's turn eagerness levels
*/
enum TurnEagerness {
PATIENT = "patient",
NORMAL = "normal",
EAGER = "eager",
}
/**
* Text-to-speech configuration for conversational AI
*/
interface TtsConversationalConfigOutput {
/** TTS model to use */
modelId?: TtsConversationalModel;
/** Voice ID for TTS */
voiceId?: string;
/** Additional supported voices */
supportedVoices?: SupportedVoice[];
/** Audio format for TTS output */
agentOutputAudioFormat?: TtsOutputFormat;
/** Streaming latency optimization level */
optimizeStreamingLatency?: TtsOptimizeStreamingLatency;
/** Stability of generated speech */
stability?: number;
/** Speed of generated speech */
speed?: number;
/** Similarity boost for generated speech */
similarityBoost?: number;
/** Method for converting numbers to words */
textNormalisationType?: TextNormalisationType;
/** Pronunciation dictionary locators */
pronunciationDictionaryLocators?: PronunciationDictionaryVersionLocator[];
}
/**
* TTS models for conversational AI
*/
enum TtsConversationalModel {
ELEVEN_TURBO_V2 = "eleven_turbo_v2",
ELEVEN_TURBO_V2_5 = "eleven_turbo_v2_5",
ELEVEN_FLASH_V2 = "eleven_flash_v2",
ELEVEN_FLASH_V2_5 = "eleven_flash_v2_5",
ELEVEN_MULTILINGUAL_V2 = "eleven_multilingual_v2",
}
/**
* TTS output format options
*/
enum TtsOutputFormat {
PCM_16000 = "pcm_16000",
PCM_22050 = "pcm_22050",
PCM_24000 = "pcm_24000",
PCM_44100 = "pcm_44100",
ULAW_8000 = "ulaw_8000",
}
/**
* TTS streaming latency optimization levels
*/
enum TtsOptimizeStreamingLatency {
ZERO = 0,
ONE = 1,
TWO = 2,
THREE = 3,
FOUR = 4,
}
/**
* Text normalization types for TTS
*/
enum TextNormalisationType {
SYSTEM_PROMPT = "system_prompt",
ELEVENLABS = "elevenlabs",
OFF = "off",
}
/**
* Speech-to-text configuration
*/
interface AsrConversationalConfig {
/** Transcription quality level */
quality?: AsrQuality;
/** Transcription service provider */
provider?: AsrProvider;
/** Audio format for user input */
userInputAudioFormat?: AsrInputFormat;
/** Keywords to boost prediction probability */
keywords?: string[];
}
/**
* ASR quality levels
*/
type AsrQuality = "high";
/**
* ASR provider options
*/
enum AsrProvider {
ELEVENLABS = "elevenlabs",
DEEPGRAM = "deepgram",
}
/**
* ASR input audio formats
*/
enum AsrInputFormat {
PCM_16000 = "pcm_16000",
PCM_22050 = "pcm_22050",
PCM_24000 = "pcm_24000",
PCM_44100 = "pcm_44100",
ULAW_8000 = "ulaw_8000",
}
/**
* Voice activity detection configuration
*/
type VadConfig = Record<string, never>;
/**
* Built-in system tools configuration
*/
interface BuiltInToolsOutput {
/** End call tool */
endCall?: SystemToolConfigOutput;
/** Language detection tool */
languageDetection?: SystemToolConfigOutput;
/** Transfer to agent tool */
transferToAgent?: SystemToolConfigOutput;
/** Transfer to number tool */
transferToNumber?: SystemToolConfigOutput;
/** Skip turn tool */
skipTurn?: SystemToolConfigOutput;
/** Play DTMF tone tool */
playKeypadTouchTone?: SystemToolConfigOutput;
/** Voicemail detection tool */
voicemailDetection?: SystemToolConfigOutput;
}
/**
* System tool configuration
*/
interface SystemToolConfigOutput {
/** Whether the tool is enabled */
enabled?: boolean;
/** Tool-specific configuration */
config?: Record<string, any>;
}
/**
* Knowledge base locator for agent
*/
interface KnowledgeBaseLocator {
/** Type of knowledge base */
type: KnowledgeBaseDocumentType;
/** Name of the knowledge base */
name: string;
/** ID of the knowledge base */
id: string;
/** Usage mode for the knowledge base */
usageMode?: DocumentUsageModeEnum;
}
/**
* Knowledge base document types
*/
enum KnowledgeBaseDocumentType {
FILE = "file",
URL = "url",
TEXT = "text",
FOLDER = "folder",
}
/**
* Document usage modes for knowledge base
*/
enum DocumentUsageModeEnum {
ALWAYS = "always",
WHEN_MENTIONED = "when_mentioned",
}
/**
* Custom LLM configuration
*/
interface CustomLlm {
/** URL of Chat Completions compatible endpoint */
url: string;
/** Model ID if URL serves multiple models */
modelId?: string;
/** API key for authentication */
apiKey?: ConvAiSecretLocator;
/** Headers for the request */
requestHeaders?: Record<string, any>;
/** API version to use */
apiVersion?: string;
/** API type (chat_completions or responses) */
apiType?: CustomLlmApiType;
}
/**
* Custom LLM API types
*/
enum CustomLlmApiType {
CHAT_COMPLETIONS = "chat_completions",
RESPONSES = "responses",
}
/**
* RAG (Retrieval Augmented Generation) configuration
*/
interface RagConfig {
/** Whether RAG is enabled */
enabled?: boolean;
/** Embedding model to use */
embeddingModel?: EmbeddingModelEnum;
/** Maximum vector distance for retrieved chunks */
maxVectorDistance?: number;
/** Maximum total length of retrieved chunks */
maxDocumentsLength?: number;
/** Maximum number of chunks to initially retrieve */
maxRetrievedRagChunksCount?: number;
/** Custom prompt for rewriting queries before RAG */
queryRewritePromptOverride?: string;
}
/**
* Embedding models for RAG
*/
enum EmbeddingModelEnum {
TEXT_EMBEDDING_3_SMALL = "text-embedding-3-small",
TEXT_EMBEDDING_3_LARGE = "text-embedding-3-large",
TEXT_EMBEDDING_ADA_002 = "text-embedding-ada-002",
}
/**
* Supported voice configuration
*/
interface SupportedVoice {
/** Display label for the voice */
label: string;
/** Voice ID */
voiceId: string;
/** Voice description */
description?: string;
/** Language of the voice */
language?: string;
/** TTS model family */
modelFamily?: TtsModelFamily;
/** Streaming latency optimization */
optimizeStreamingLatency?: TtsOptimizeStreamingLatency;
/** Voice stability */
stability?: number;
/** Voice speed */
speed?: number;
/** Similarity boost */
similarityBoost?: number;
}
/**
* TTS model families
*/
enum TtsModelFamily {
TURBO = "turbo",
FLASH = "flash",
MULTILINGUAL = "multilingual",
}
/**
* Secret locator for sensitive values
*/
interface ConvAiSecretLocator {
/** Secret ID */
secretId: string;
}
/**
* Language preset configuration
*/
interface LanguagePresetOutput {
/** Language code */
language: string;
/** TTS configuration override for language */
tts?: TtsConversationalConfigOutput;
/** ASR configuration override for language */
asr?: AsrConversationalConfig;
/** Agent configuration override for language */
agent?: AgentConfig;
}