The official Python SDK for the Deepgram automated speech recognition platform.
—
Real-time conversational AI capabilities enabling voice-based interactions with intelligent agents. The Agent module supports function calling, dynamic prompt updates, bidirectional audio streaming, and sophisticated conversation management for building interactive voice applications.
Real-time WebSocket clients for conversational AI interactions with full duplex audio streaming and message handling.
class AgentWebSocketClient:
def start(self, options: SettingsOptions) -> bool:
"""
Start WebSocket connection for agent interaction.
Args:
options: Agent configuration settings
Returns:
bool: True if connection started successfully
"""
def send_settings(self, settings: SettingsOptions) -> bool:
"""
Update agent settings during conversation.
Args:
settings: New agent configuration
Returns:
bool: True if settings sent successfully
"""
def update_prompt(self, options: UpdatePromptOptions) -> bool:
"""
Update the agent's system prompt.
Args:
options: New prompt configuration
Returns:
bool: True if prompt updated successfully
"""
def update_speak_options(self, options: UpdateSpeakOptions) -> bool:
"""
Update the agent's speech synthesis settings.
Args:
options: New speak configuration
Returns:
bool: True if speak options updated successfully
"""
def inject_agent_message(self, options: InjectAgentMessageOptions) -> bool:
"""
Inject a message as if spoken by the agent.
Args:
options: Message injection configuration
Returns:
bool: True if message injected successfully
"""
def inject_user_message(self, options: InjectUserMessageOptions) -> bool:
"""
Inject a message as if spoken by the user.
Args:
options: Message injection configuration
Returns:
bool: True if message injected successfully
"""
def send_function_call_response(self, response: FunctionCallResponse) -> bool:
"""
Send response to agent function call request.
Args:
response: Function call result
Returns:
bool: True if response sent successfully
"""
def keep_alive(self) -> bool:
"""
Send keep-alive message to maintain connection.
Returns:
bool: True if keep-alive sent successfully
"""
def send_audio(self, audio_data: bytes) -> bool:
"""
Send audio data to the agent.
Args:
audio_data: Raw audio bytes
Returns:
bool: True if audio sent successfully
"""
def close(self) -> bool:
"""
Close WebSocket connection.
Returns:
bool: True if connection closed successfully
"""
class AsyncAgentWebSocketClient:
# All methods are async versions of AgentWebSocketClient methods
async def start(self, options: SettingsOptions) -> bool: ...
async def send_settings(self, settings: SettingsOptions) -> bool: ...
async def update_prompt(self, options: UpdatePromptOptions) -> bool: ...
# ... (all other methods with async keyword)Access conversational AI clients through the main client's agent router.
class AgentRouter:
@property
def websocket(self) -> AgentWebSocketClient: ...
@property
def asyncwebsocket(self) -> AsyncAgentWebSocketClient: ...class SettingsOptions:
def __init__(self, **kwargs): ...
agent: Agent # Agent configuration
listen: Listen = None # Speech-to-text settings
speak: Speak = None # Text-to-speech settings
think: Think = None # Thinking/processing settings
class UpdatePromptOptions:
def __init__(self, **kwargs): ...
prompt: str # New system prompt text
class UpdateSpeakOptions:
def __init__(self, **kwargs): ...
speak: Speak # New speech synthesis settings
class InjectAgentMessageOptions:
def __init__(self, **kwargs): ...
text: str # Message text to inject
class InjectUserMessageOptions:
def __init__(self, **kwargs): ...
text: str # User message text to inject
class FunctionCallResponse:
def __init__(self, **kwargs): ...
name: str # Function name
result: str # Function execution result
class AgentKeepAlive:
def __init__(self, **kwargs): ...
type: str = "KeepAlive" # Message typeclass Agent:
def __init__(self, **kwargs): ...
listen: Listen # Listening configuration
think: Think # Thinking configuration
speak: Speak # Speaking configuration
class Listen:
def __init__(self, **kwargs): ...
model: str = "nova-2" # STT model
language: str = "en-US" # Language code
smart_format: bool = True # Smart formatting
encoding: str = "linear16" # Audio encoding
sample_rate: int = 16000 # Sample rate
channels: int = 1 # Audio channels
interim_results: bool = True # Interim results
vad_events: bool = True # Voice activity detection
endpointing: bool = True # Endpoint detection
class Speak:
def __init__(self, **kwargs): ...
model: str = "aura-asteria-en" # TTS model
encoding: str = "linear16" # Audio encoding
sample_rate: int = 24000 # Sample rate
container: str = "none" # Audio container
class Think:
def __init__(self, **kwargs): ...
provider: Provider # AI provider configuration
model: str = "gpt-4" # Language model
instructions: str = "" # System instructions
functions: list[Function] = None # Available functions
class Provider:
def __init__(self, **kwargs): ...
type: str = "open_ai" # Provider type
```
#### Function Configuration
```python { .api }
class Function:
def __init__(self, **kwargs): ...
name: str # Function name
description: str # Function description
parameters: Parameters # Function parameters schema
class Parameters:
def __init__(self, **kwargs): ...
type: str = "object" # Parameters type
properties: Properties # Parameter properties
required: list[str] = None # Required parameters
class Properties:
def __init__(self, **kwargs): ...
# Dynamic properties based on function parameters
class Header:
def __init__(self, **kwargs): ...
name: str # Header name
value: str # Header value
class Item:
def __init__(self, **kwargs): ...
# Generic item configuration
class Input:
def __init__(self, **kwargs): ...
# Input configuration
class Output:
def __init__(self, **kwargs): ...
# Output configuration
class Audio:
def __init__(self, **kwargs): ...
# Audio configuration
class Endpoint:
def __init__(self, **kwargs): ...
# Endpoint configurationclass WelcomeResponse:
"""Initial connection welcome message"""
type: str = "Welcome"
message: str
class SettingsAppliedResponse:
"""Settings update confirmation"""
type: str = "SettingsApplied"
settings: dict
class ConversationTextResponse:
"""Conversation text event"""
type: str = "ConversationText"
text: str
role: str # "user" or "assistant"
class UserStartedSpeakingResponse:
"""User speech detection event"""
type: str = "UserStartedSpeaking"
timestamp: str
class AgentThinkingResponse:
"""Agent processing indication"""
type: str = "AgentThinking"
class FunctionCall:
"""Function call data"""
name: str
arguments: dict
class FunctionCallRequest:
"""Function call request from agent"""
type: str = "FunctionCallRequest"
function_call: FunctionCall
call_id: str
class AgentStartedSpeakingResponse:
"""Agent speech start event"""
type: str = "AgentStartedSpeaking"
timestamp: str
class AgentAudioDoneResponse:
"""Agent finished speaking event"""
type: str = "AgentAudioDone"
class InjectionRefusedResponse:
"""Message injection refusal"""
type: str = "InjectionRefused"
message: str
# Common WebSocket responses are inherited:
# OpenResponse, CloseResponse, ErrorResponse, UnhandledResponseclass AgentWebSocketEvents:
"""WebSocket event types for conversational AI"""
# Server Events (received from agent)
Open: str = "Open"
Close: str = "Close"
AudioData: str = "AudioData"
Welcome: str = "Welcome"
SettingsApplied: str = "SettingsApplied"
ConversationText: str = "ConversationText"
UserStartedSpeaking: str = "UserStartedSpeaking"
AgentThinking: str = "AgentThinking"
FunctionCallRequest: str = "FunctionCallRequest"
AgentStartedSpeaking: str = "AgentStartedSpeaking"
AgentAudioDone: str = "AgentAudioDone"
Error: str = "Error"
Unhandled: str = "Unhandled"
# Client Events (sent to agent)
Settings: str = "Settings"
UpdatePrompt: str = "UpdatePrompt"
UpdateSpeak: str = "UpdateSpeak"
InjectAgentMessage: str = "InjectAgentMessage"
InjectUserMessage: str = "InjectUserMessage"
InjectionRefused: str = "InjectionRefused"
AgentKeepAlive: str = "KeepAlive"from deepgram import DeepgramClient, SettingsOptions, Agent, Listen, Speak, Think, Provider, AgentWebSocketEvents
import threading
client = DeepgramClient(api_key="your-api-key")
def on_open(self, open_event, **kwargs):
print("Agent connection opened")
def on_welcome(self, welcome, **kwargs):
print(f"Agent welcome: {welcome.message}")
def on_conversation_text(self, text_event, **kwargs):
print(f"{text_event.role}: {text_event.text}")
def on_user_started_speaking(self, event, **kwargs):
print("User started speaking")
def on_agent_thinking(self, event, **kwargs):
print("Agent is thinking...")
def on_agent_started_speaking(self, event, **kwargs):
print("Agent started speaking")
def on_agent_audio_done(self, event, **kwargs):
print("Agent finished speaking")
def on_audio_data(self, audio_data, **kwargs):
# Handle agent's speech audio
# In a real application, you'd play this audio
print(f"Received {len(audio_data)} bytes of audio")
def on_error(self, error, **kwargs):
print(f"Agent error: {error}")
# Configure agent settings
agent_settings = SettingsOptions(
agent=Agent(
listen=Listen(
model="nova-2",
language="en-US",
smart_format=True,
encoding="linear16",
sample_rate=16000,
interim_results=True,
vad_events=True
),
think=Think(
provider=Provider(type="open_ai"),
model="gpt-4",
instructions="You are a helpful AI assistant. Be conversational and friendly."
),
speak=Speak(
model="aura-asteria-en",
encoding="linear16",
sample_rate=24000
)
)
)
# Create connection
dg_connection = client.agent.websocket.v("1")
# Set up event handlers
dg_connection.on(AgentWebSocketEvents.Open, on_open)
dg_connection.on(AgentWebSocketEvents.Welcome, on_welcome)
dg_connection.on(AgentWebSocketEvents.ConversationText, on_conversation_text)
dg_connection.on(AgentWebSocketEvents.UserStartedSpeaking, on_user_started_speaking)
dg_connection.on(AgentWebSocketEvents.AgentThinking, on_agent_thinking)
dg_connection.on(AgentWebSocketEvents.AgentStartedSpeaking, on_agent_started_speaking)
dg_connection.on(AgentWebSocketEvents.AgentAudioDone, on_agent_audio_done)
dg_connection.on(AgentWebSocketEvents.AudioData, on_audio_data)
dg_connection.on(AgentWebSocketEvents.Error, on_error)
# Start connection
if dg_connection.start(agent_settings):
print("Agent connection started")
# Send audio data (typically from microphone)
# audio_data = get_microphone_data()
# dg_connection.send_audio(audio_data)
# Keep connection alive
# dg_connection.keep_alive()
# Close when done
dg_connection.close()from deepgram import (
DeepgramClient, SettingsOptions, Agent, Think, Provider, Function,
Parameters, Properties, FunctionCallResponse, AgentWebSocketEvents
)
import json
client = DeepgramClient(api_key="your-api-key")
def on_function_call_request(self, request, **kwargs):
"""Handle function call requests from the agent"""
print(f"Function call: {request.function_call.name}")
print(f"Arguments: {request.function_call.arguments}")
# Execute the function based on name
if request.function_call.name == "get_weather":
location = request.function_call.arguments.get("location")
weather_data = get_weather(location) # Your weather function
# Send response back to agent
response = FunctionCallResponse(
name=request.function_call.name,
result=json.dumps(weather_data)
)
dg_connection.send_function_call_response(response)
elif request.function_call.name == "set_reminder":
reminder = request.function_call.arguments.get("reminder")
time = request.function_call.arguments.get("time")
result = set_reminder(reminder, time) # Your reminder function
response = FunctionCallResponse(
name=request.function_call.name,
result=json.dumps({"success": result})
)
dg_connection.send_function_call_response(response)
def get_weather(location):
"""Mock weather function"""
return {
"location": location,
"temperature": 72,
"condition": "sunny",
"humidity": 45
}
def set_reminder(reminder, time):
"""Mock reminder function"""
print(f"Setting reminder: {reminder} at {time}")
return True
# Define available functions
weather_function = Function(
name="get_weather",
description="Get current weather information for a location",
parameters=Parameters(
type="object",
properties={
"location": {"type": "string", "description": "City name or location"}
},
required=["location"]
)
)
reminder_function = Function(
name="set_reminder",
description="Set a reminder for the user",
parameters=Parameters(
type="object",
properties={
"reminder": {"type": "string", "description": "Reminder text"},
"time": {"type": "string", "description": "Time for the reminder"}
},
required=["reminder", "time"]
)
)
# Configure agent with functions
agent_settings = SettingsOptions(
agent=Agent(
think=Think(
provider=Provider(type="open_ai"),
model="gpt-4",
instructions="You are a helpful assistant with access to weather and reminder functions. Use them when appropriate.",
functions=[weather_function, reminder_function]
)
# ... other agent configuration
)
)
dg_connection = client.agent.websocket.v("1")
dg_connection.on(AgentWebSocketEvents.FunctionCallRequest, on_function_call_request)
if dg_connection.start(agent_settings):
# Agent can now call functions during conversation
passfrom deepgram import (
DeepgramClient, UpdatePromptOptions, UpdateSpeakOptions,
InjectAgentMessageOptions, InjectUserMessageOptions, Speak
)
client = DeepgramClient(api_key="your-api-key")
dg_connection = client.agent.websocket.v("1")
# Start with initial settings
if dg_connection.start(initial_settings):
# Update the agent's personality/instructions
new_prompt = UpdatePromptOptions(
prompt="You are now a cheerful children's storyteller. Use simple language and be very enthusiastic."
)
dg_connection.update_prompt(new_prompt)
# Change the voice model
new_speak_options = UpdateSpeakOptions(
speak=Speak(
model="aura-luna-en", # Different voice
encoding="linear16",
sample_rate=24000
)
)
dg_connection.update_speak_options(new_speak_options)
# Inject context into the conversation
agent_message = InjectAgentMessageOptions(
text="I just switched to storytelling mode! What kind of story would you like to hear?"
)
dg_connection.inject_agent_message(agent_message)
# Inject user context
user_message = InjectUserMessageOptions(
text="The user mentioned they like adventure stories about pirates."
)
dg_connection.inject_user_message(user_message)from deepgram import DeepgramClient, SettingsOptions, Agent, Think, Provider
import asyncio
async def create_agent(client, agent_id, instructions):
"""Create and configure an agent"""
settings = SettingsOptions(
agent=Agent(
think=Think(
provider=Provider(type="open_ai"),
model="gpt-4",
instructions=f"Agent {agent_id}: {instructions}"
)
# ... other configuration
)
)
connection = client.agent.asyncwebsocket.v("1")
await connection.start(settings)
return connection
async def multi_agent_example():
client = DeepgramClient(api_key="your-api-key")
# Create multiple agents with different roles
moderator = await create_agent(
client, "Moderator",
"You are a meeting moderator. Keep discussions on track and summarize key points."
)
expert1 = await create_agent(
client, "Expert1",
"You are a technical expert. Provide detailed technical insights."
)
expert2 = await create_agent(
client, "Expert2",
"You are a business expert. Focus on practical business implications."
)
# Coordinate conversation between agents
# This would involve managing turn-taking and message passing
# between the different agent connections
# Close connections when done
await moderator.close()
await expert1.close()
await expert2.close()
# Run multi-agent example
asyncio.run(multi_agent_example())from deepgram import DeepgramClient, DeepgramApiError, SettingsOptions, AgentWebSocketEvents
client = DeepgramClient(api_key="your-api-key")
def on_error(self, error, **kwargs):
"""Handle various error types"""
print(f"Agent error: {error}")
# Implement error-specific recovery logic
if "connection" in str(error).lower():
print("Connection error - attempting to reconnect...")
# Implement reconnection logic
elif "authentication" in str(error).lower():
print("Authentication error - check API key")
else:
print("Unknown error - logging for investigation")
def on_injection_refused(self, refusal, **kwargs):
"""Handle message injection refusals"""
print(f"Message injection refused: {refusal.message}")
# Implement fallback logic
try:
settings = SettingsOptions(
# ... agent configuration
)
dg_connection = client.agent.websocket.v("1")
dg_connection.on(AgentWebSocketEvents.Error, on_error)
dg_connection.on(AgentWebSocketEvents.InjectionRefused, on_injection_refused)
if dg_connection.start(settings):
# Connection successful
print("Agent started successfully")
# Implement connection health monitoring
# dg_connection.keep_alive() # Send periodically
else:
print("Failed to start agent connection")
except DeepgramApiError as e:
print(f"API Error: {e}")
except Exception as e:
print(f"Unexpected error: {e}")
finally:
if 'dg_connection' in locals():
dg_connection.close()Install with Tessl CLI
npx tessl i tessl/pypi-deepgram-sdk