Tessl Tile for pypi/deepgram-sdk@4.8.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

audio-utilities.md conversational-ai.md index.md project-management.md speech-to-text.md text-analysis.md text-to-speech.md

conversational-ai.mddocs/

0
# Conversational AI
1

2
Real-time conversational AI capabilities enabling voice-based interactions with intelligent agents. The Agent module supports function calling, dynamic prompt updates, bidirectional audio streaming, and sophisticated conversation management for building interactive voice applications.
3

4
## Capabilities
5

6
### Agent WebSocket Client
7

8
Real-time WebSocket clients for conversational AI interactions with full duplex audio streaming and message handling.
9

10
```python { .api }
11
class AgentWebSocketClient:
12
    def start(self, options: SettingsOptions) -> bool:
13
        """
14
        Start WebSocket connection for agent interaction.
15
        
16
        Args:
17
            options: Agent configuration settings
18
            
19
        Returns:
20
            bool: True if connection started successfully
21
        """
22
    
23
    def send_settings(self, settings: SettingsOptions) -> bool:
24
        """
25
        Update agent settings during conversation.
26
        
27
        Args:
28
            settings: New agent configuration
29
            
30
        Returns:
31
            bool: True if settings sent successfully
32
        """
33
    
34
    def update_prompt(self, options: UpdatePromptOptions) -> bool:
35
        """
36
        Update the agent's system prompt.
37
        
38
        Args:
39
            options: New prompt configuration
40
            
41
        Returns:
42
            bool: True if prompt updated successfully
43
        """
44
    
45
    def update_speak_options(self, options: UpdateSpeakOptions) -> bool:
46
        """
47
        Update the agent's speech synthesis settings.
48
        
49
        Args:
50
            options: New speak configuration
51
            
52
        Returns:
53
            bool: True if speak options updated successfully
54
        """
55
    
56
    def inject_agent_message(self, options: InjectAgentMessageOptions) -> bool:
57
        """
58
        Inject a message as if spoken by the agent.
59
        
60
        Args:
61
            options: Message injection configuration
62
            
63
        Returns:
64
            bool: True if message injected successfully
65
        """
66
    
67
    def inject_user_message(self, options: InjectUserMessageOptions) -> bool:
68
        """
69
        Inject a message as if spoken by the user.
70
        
71
        Args:
72
            options: Message injection configuration
73
            
74
        Returns:
75
            bool: True if message injected successfully
76
        """
77
    
78
    def send_function_call_response(self, response: FunctionCallResponse) -> bool:
79
        """
80
        Send response to agent function call request.
81
        
82
        Args:
83
            response: Function call result
84
            
85
        Returns:
86
            bool: True if response sent successfully
87
        """
88
    
89
    def keep_alive(self) -> bool:
90
        """
91
        Send keep-alive message to maintain connection.
92
        
93
        Returns:
94
            bool: True if keep-alive sent successfully
95
        """
96
    
97
    def send_audio(self, audio_data: bytes) -> bool:
98
        """
99
        Send audio data to the agent.
100
        
101
        Args:
102
            audio_data: Raw audio bytes
103
            
104
        Returns:
105
            bool: True if audio sent successfully
106
        """
107
    
108
    def close(self) -> bool:
109
        """
110
        Close WebSocket connection.
111
        
112
        Returns:
113
            bool: True if connection closed successfully
114
        """
115

116
class AsyncAgentWebSocketClient:
117
    # All methods are async versions of AgentWebSocketClient methods
118
    async def start(self, options: SettingsOptions) -> bool: ...
119
    async def send_settings(self, settings: SettingsOptions) -> bool: ...
120
    async def update_prompt(self, options: UpdatePromptOptions) -> bool: ...
121
    # ... (all other methods with async keyword)
122
```
123

124
### Router Access
125

126
Access conversational AI clients through the main client's agent router.
127

128
```python { .api }
129
class AgentRouter:
130
    @property
131
    def websocket(self) -> AgentWebSocketClient: ...
132
    @property
133
    def asyncwebsocket(self) -> AsyncAgentWebSocketClient: ...
134
```
135

136
### Options Classes
137

138
#### Top-level Configuration
139

140
```python { .api }
141
class SettingsOptions:
142
    def __init__(self, **kwargs): ...
143
    agent: Agent  # Agent configuration
144
    listen: Listen = None  # Speech-to-text settings
145
    speak: Speak = None  # Text-to-speech settings
146
    think: Think = None  # Thinking/processing settings
147

148
class UpdatePromptOptions:
149
    def __init__(self, **kwargs): ...
150
    prompt: str  # New system prompt text
151

152
class UpdateSpeakOptions:
153
    def __init__(self, **kwargs): ...
154
    speak: Speak  # New speech synthesis settings
155

156
class InjectAgentMessageOptions:
157
    def __init__(self, **kwargs): ...
158
    text: str  # Message text to inject
159

160
class InjectUserMessageOptions:
161
    def __init__(self, **kwargs): ...
162
    text: str  # User message text to inject
163

164
class FunctionCallResponse:
165
    def __init__(self, **kwargs): ...
166
    name: str  # Function name
167
    result: str  # Function execution result
168

169
class AgentKeepAlive:
170
    def __init__(self, **kwargs): ...
171
    type: str = "KeepAlive"  # Message type
172
```
173

174
#### Sub-level Configuration
175

176
```python { .api }
177
class Agent:
178
    def __init__(self, **kwargs): ...
179
    listen: Listen  # Listening configuration
180
    think: Think  # Thinking configuration
181
    speak: Speak  # Speaking configuration
182

183
class Listen:
184
    def __init__(self, **kwargs): ...
185
    model: str = "nova-2"  # STT model
186
    language: str = "en-US"  # Language code
187
    smart_format: bool = True  # Smart formatting
188
    encoding: str = "linear16"  # Audio encoding
189
    sample_rate: int = 16000  # Sample rate
190
    channels: int = 1  # Audio channels
191
    interim_results: bool = True  # Interim results
192
    vad_events: bool = True  # Voice activity detection
193
    endpointing: bool = True  # Endpoint detection
194

195
class Speak:
196
    def __init__(self, **kwargs): ...
197
    model: str = "aura-asteria-en"  # TTS model
198
    encoding: str = "linear16"  # Audio encoding
199
    sample_rate: int = 24000  # Sample rate
200
    container: str = "none"  # Audio container
201

202
class Think:
203
    def __init__(self, **kwargs): ...
204
    provider: Provider  # AI provider configuration
205
    model: str = "gpt-4"  # Language model
206
    instructions: str = ""  # System instructions
207
    functions: list[Function] = None  # Available functions
208

209
class Provider:
210
    def __init__(self, **kwargs): ...
211
    type: str = "open_ai"  # Provider type
212
    ```
213

214
#### Function Configuration
215

216
```python { .api }
217
class Function:
218
    def __init__(self, **kwargs): ...
219
    name: str  # Function name
220
    description: str  # Function description
221
    parameters: Parameters  # Function parameters schema
222

223
class Parameters:
224
    def __init__(self, **kwargs): ...
225
    type: str = "object"  # Parameters type
226
    properties: Properties  # Parameter properties
227
    required: list[str] = None  # Required parameters
228

229
class Properties:
230
    def __init__(self, **kwargs): ...
231
    # Dynamic properties based on function parameters
232

233
class Header:
234
    def __init__(self, **kwargs): ...
235
    name: str  # Header name
236
    value: str  # Header value
237

238
class Item:
239
    def __init__(self, **kwargs): ...
240
    # Generic item configuration
241

242
class Input:
243
    def __init__(self, **kwargs): ...
244
    # Input configuration
245

246
class Output:
247
    def __init__(self, **kwargs): ...
248
    # Output configuration
249

250
class Audio:
251
    def __init__(self, **kwargs): ...
252
    # Audio configuration
253

254
class Endpoint:
255
    def __init__(self, **kwargs): ...
256
    # Endpoint configuration
257
```
258

259
### Response Types
260

261
#### Agent-Specific Responses
262

263
```python { .api }
264
class WelcomeResponse:
265
    """Initial connection welcome message"""
266
    type: str = "Welcome"
267
    message: str
268

269
class SettingsAppliedResponse:
270
    """Settings update confirmation"""
271
    type: str = "SettingsApplied"
272
    settings: dict
273

274
class ConversationTextResponse:
275
    """Conversation text event"""
276
    type: str = "ConversationText"
277
    text: str
278
    role: str  # "user" or "assistant"
279

280
class UserStartedSpeakingResponse:
281
    """User speech detection event"""
282
    type: str = "UserStartedSpeaking"
283
    timestamp: str
284

285
class AgentThinkingResponse:
286
    """Agent processing indication"""
287
    type: str = "AgentThinking"
288

289
class FunctionCall:
290
    """Function call data"""
291
    name: str
292
    arguments: dict
293

294
class FunctionCallRequest:
295
    """Function call request from agent"""
296
    type: str = "FunctionCallRequest"
297
    function_call: FunctionCall
298
    call_id: str
299

300
class AgentStartedSpeakingResponse:
301
    """Agent speech start event"""
302
    type: str = "AgentStartedSpeaking"
303
    timestamp: str
304

305
class AgentAudioDoneResponse:
306
    """Agent finished speaking event"""
307
    type: str = "AgentAudioDone"
308

309
class InjectionRefusedResponse:
310
    """Message injection refusal"""
311
    type: str = "InjectionRefused"
312
    message: str
313
    
314
# Common WebSocket responses are inherited:
315
# OpenResponse, CloseResponse, ErrorResponse, UnhandledResponse
316
```
317

318
### Events
319

320
```python { .api }
321
class AgentWebSocketEvents:
322
    """WebSocket event types for conversational AI"""
323
    
324
    # Server Events (received from agent)
325
    Open: str = "Open"
326
    Close: str = "Close"
327
    AudioData: str = "AudioData"
328
    Welcome: str = "Welcome"
329
    SettingsApplied: str = "SettingsApplied"
330
    ConversationText: str = "ConversationText"
331
    UserStartedSpeaking: str = "UserStartedSpeaking"
332
    AgentThinking: str = "AgentThinking"
333
    FunctionCallRequest: str = "FunctionCallRequest"
334
    AgentStartedSpeaking: str = "AgentStartedSpeaking"
335
    AgentAudioDone: str = "AgentAudioDone"
336
    Error: str = "Error"
337
    Unhandled: str = "Unhandled"
338
    
339
    # Client Events (sent to agent)
340
    Settings: str = "Settings"
341
    UpdatePrompt: str = "UpdatePrompt"
342
    UpdateSpeak: str = "UpdateSpeak"
343
    InjectAgentMessage: str = "InjectAgentMessage"
344
    InjectUserMessage: str = "InjectUserMessage"
345
    InjectionRefused: str = "InjectionRefused"
346
    AgentKeepAlive: str = "KeepAlive"
347
```
348

349
## Usage Examples
350

351
### Basic Conversational Agent
352

353
```python
354
from deepgram import DeepgramClient, SettingsOptions, Agent, Listen, Speak, Think, Provider, AgentWebSocketEvents
355
import threading
356

357
client = DeepgramClient(api_key="your-api-key")
358

359
def on_open(self, open_event, **kwargs):
360
    print("Agent connection opened")
361

362
def on_welcome(self, welcome, **kwargs):
363
    print(f"Agent welcome: {welcome.message}")
364

365
def on_conversation_text(self, text_event, **kwargs):
366
    print(f"{text_event.role}: {text_event.text}")
367

368
def on_user_started_speaking(self, event, **kwargs):
369
    print("User started speaking")
370

371
def on_agent_thinking(self, event, **kwargs):
372
    print("Agent is thinking...")
373

374
def on_agent_started_speaking(self, event, **kwargs):
375
    print("Agent started speaking")
376

377
def on_agent_audio_done(self, event, **kwargs):
378
    print("Agent finished speaking")
379

380
def on_audio_data(self, audio_data, **kwargs):
381
    # Handle agent's speech audio
382
    # In a real application, you'd play this audio
383
    print(f"Received {len(audio_data)} bytes of audio")
384

385
def on_error(self, error, **kwargs):
386
    print(f"Agent error: {error}")
387

388
# Configure agent settings
389
agent_settings = SettingsOptions(
390
    agent=Agent(
391
        listen=Listen(
392
            model="nova-2",
393
            language="en-US",
394
            smart_format=True,
395
            encoding="linear16",
396
            sample_rate=16000,
397
            interim_results=True,
398
            vad_events=True
399
        ),
400
        think=Think(
401
            provider=Provider(type="open_ai"),
402
            model="gpt-4",
403
            instructions="You are a helpful AI assistant. Be conversational and friendly."
404
        ),
405
        speak=Speak(
406
            model="aura-asteria-en",
407
            encoding="linear16",
408
            sample_rate=24000
409
        )
410
    )
411
)
412

413
# Create connection
414
dg_connection = client.agent.websocket.v("1")
415

416
# Set up event handlers
417
dg_connection.on(AgentWebSocketEvents.Open, on_open)
418
dg_connection.on(AgentWebSocketEvents.Welcome, on_welcome)
419
dg_connection.on(AgentWebSocketEvents.ConversationText, on_conversation_text)
420
dg_connection.on(AgentWebSocketEvents.UserStartedSpeaking, on_user_started_speaking)
421
dg_connection.on(AgentWebSocketEvents.AgentThinking, on_agent_thinking)
422
dg_connection.on(AgentWebSocketEvents.AgentStartedSpeaking, on_agent_started_speaking)
423
dg_connection.on(AgentWebSocketEvents.AgentAudioDone, on_agent_audio_done)
424
dg_connection.on(AgentWebSocketEvents.AudioData, on_audio_data)
425
dg_connection.on(AgentWebSocketEvents.Error, on_error)
426

427
# Start connection
428
if dg_connection.start(agent_settings):
429
    print("Agent connection started")
430
    
431
    # Send audio data (typically from microphone)
432
    # audio_data = get_microphone_data()
433
    # dg_connection.send_audio(audio_data)
434
    
435
    # Keep connection alive
436
    # dg_connection.keep_alive()
437
    
438
    # Close when done
439
    dg_connection.close()
440
```
441

442
### Agent with Function Calling
443

444
```python
445
from deepgram import (
446
    DeepgramClient, SettingsOptions, Agent, Think, Provider, Function, 
447
    Parameters, Properties, FunctionCallResponse, AgentWebSocketEvents
448
)
449
import json
450

451
client = DeepgramClient(api_key="your-api-key")
452

453
def on_function_call_request(self, request, **kwargs):
454
    """Handle function call requests from the agent"""
455
    print(f"Function call: {request.function_call.name}")
456
    print(f"Arguments: {request.function_call.arguments}")
457
    
458
    # Execute the function based on name
459
    if request.function_call.name == "get_weather":
460
        location = request.function_call.arguments.get("location")
461
        weather_data = get_weather(location)  # Your weather function
462
        
463
        # Send response back to agent
464
        response = FunctionCallResponse(
465
            name=request.function_call.name,
466
            result=json.dumps(weather_data)
467
        )
468
        dg_connection.send_function_call_response(response)
469
    
470
    elif request.function_call.name == "set_reminder":
471
        reminder = request.function_call.arguments.get("reminder")
472
        time = request.function_call.arguments.get("time")
473
        result = set_reminder(reminder, time)  # Your reminder function
474
        
475
        response = FunctionCallResponse(
476
            name=request.function_call.name,
477
            result=json.dumps({"success": result})
478
        )
479
        dg_connection.send_function_call_response(response)
480

481
def get_weather(location):
482
    """Mock weather function"""
483
    return {
484
        "location": location,
485
        "temperature": 72,
486
        "condition": "sunny",
487
        "humidity": 45
488
    }
489

490
def set_reminder(reminder, time):
491
    """Mock reminder function"""
492
    print(f"Setting reminder: {reminder} at {time}")
493
    return True
494

495
# Define available functions
496
weather_function = Function(
497
    name="get_weather",
498
    description="Get current weather information for a location",
499
    parameters=Parameters(
500
        type="object",
501
        properties={
502
            "location": {"type": "string", "description": "City name or location"}
503
        },
504
        required=["location"]
505
    )
506
)
507

508
reminder_function = Function(
509
    name="set_reminder",
510
    description="Set a reminder for the user",
511
    parameters=Parameters(
512
        type="object",
513
        properties={
514
            "reminder": {"type": "string", "description": "Reminder text"},
515
            "time": {"type": "string", "description": "Time for the reminder"}
516
        },
517
        required=["reminder", "time"]
518
    )
519
)
520

521
# Configure agent with functions
522
agent_settings = SettingsOptions(
523
    agent=Agent(
524
        think=Think(
525
            provider=Provider(type="open_ai"),
526
            model="gpt-4",
527
            instructions="You are a helpful assistant with access to weather and reminder functions. Use them when appropriate.",
528
            functions=[weather_function, reminder_function]
529
        )
530
        # ... other agent configuration
531
    )
532
)
533

534
dg_connection = client.agent.websocket.v("1")
535
dg_connection.on(AgentWebSocketEvents.FunctionCallRequest, on_function_call_request)
536

537
if dg_connection.start(agent_settings):
538
    # Agent can now call functions during conversation
539
    pass
540
```
541

542
### Dynamic Agent Updates
543

544
```python
545
from deepgram import (
546
    DeepgramClient, UpdatePromptOptions, UpdateSpeakOptions, 
547
    InjectAgentMessageOptions, InjectUserMessageOptions, Speak
548
)
549

550
client = DeepgramClient(api_key="your-api-key")
551
dg_connection = client.agent.websocket.v("1")
552

553
# Start with initial settings
554
if dg_connection.start(initial_settings):
555
    
556
    # Update the agent's personality/instructions
557
    new_prompt = UpdatePromptOptions(
558
        prompt="You are now a cheerful children's storyteller. Use simple language and be very enthusiastic."
559
    )
560
    dg_connection.update_prompt(new_prompt)
561
    
562
    # Change the voice model
563
    new_speak_options = UpdateSpeakOptions(
564
        speak=Speak(
565
            model="aura-luna-en",  # Different voice
566
            encoding="linear16",
567
            sample_rate=24000
568
        )
569
    )
570
    dg_connection.update_speak_options(new_speak_options)
571
    
572
    # Inject context into the conversation
573
    agent_message = InjectAgentMessageOptions(
574
        text="I just switched to storytelling mode! What kind of story would you like to hear?"
575
    )
576
    dg_connection.inject_agent_message(agent_message)
577
    
578
    # Inject user context
579
    user_message = InjectUserMessageOptions(
580
        text="The user mentioned they like adventure stories about pirates."
581
    )
582
    dg_connection.inject_user_message(user_message)
583
```
584

585
### Multi-Agent Conversation
586

587
```python
588
from deepgram import DeepgramClient, SettingsOptions, Agent, Think, Provider
589
import asyncio
590

591
async def create_agent(client, agent_id, instructions):
592
    """Create and configure an agent"""
593
    settings = SettingsOptions(
594
        agent=Agent(
595
            think=Think(
596
                provider=Provider(type="open_ai"),
597
                model="gpt-4",
598
                instructions=f"Agent {agent_id}: {instructions}"
599
            )
600
            # ... other configuration
601
        )
602
    )
603
    
604
    connection = client.agent.asyncwebsocket.v("1")
605
    await connection.start(settings)
606
    return connection
607

608
async def multi_agent_example():
609
    client = DeepgramClient(api_key="your-api-key")
610
    
611
    # Create multiple agents with different roles
612
    moderator = await create_agent(
613
        client, "Moderator", 
614
        "You are a meeting moderator. Keep discussions on track and summarize key points."
615
    )
616
    
617
    expert1 = await create_agent(
618
        client, "Expert1",
619
        "You are a technical expert. Provide detailed technical insights."
620
    )
621
    
622
    expert2 = await create_agent(
623
        client, "Expert2", 
624
        "You are a business expert. Focus on practical business implications."
625
    )
626
    
627
    # Coordinate conversation between agents
628
    # This would involve managing turn-taking and message passing
629
    # between the different agent connections
630
    
631
    # Close connections when done
632
    await moderator.close()
633
    await expert1.close() 
634
    await expert2.close()
635

636
# Run multi-agent example
637
asyncio.run(multi_agent_example())
638
```
639

640
### Error Handling and Recovery
641

642
```python
643
from deepgram import DeepgramClient, DeepgramApiError, SettingsOptions, AgentWebSocketEvents
644

645
client = DeepgramClient(api_key="your-api-key")
646

647
def on_error(self, error, **kwargs):
648
    """Handle various error types"""
649
    print(f"Agent error: {error}")
650
    
651
    # Implement error-specific recovery logic
652
    if "connection" in str(error).lower():
653
        print("Connection error - attempting to reconnect...")
654
        # Implement reconnection logic
655
    elif "authentication" in str(error).lower():
656
        print("Authentication error - check API key")
657
    else:
658
        print("Unknown error - logging for investigation")
659

660
def on_injection_refused(self, refusal, **kwargs):
661
    """Handle message injection refusals"""
662
    print(f"Message injection refused: {refusal.message}")
663
    # Implement fallback logic
664

665
try:
666
    settings = SettingsOptions(
667
        # ... agent configuration
668
    )
669
    
670
    dg_connection = client.agent.websocket.v("1")
671
    dg_connection.on(AgentWebSocketEvents.Error, on_error)
672
    dg_connection.on(AgentWebSocketEvents.InjectionRefused, on_injection_refused)
673
    
674
    if dg_connection.start(settings):
675
        # Connection successful
676
        print("Agent started successfully")
677
        
678
        # Implement connection health monitoring
679
        # dg_connection.keep_alive()  # Send periodically
680
        
681
    else:
682
        print("Failed to start agent connection")
683
        
684
except DeepgramApiError as e:
685
    print(f"API Error: {e}")
686
except Exception as e:
687
    print(f"Unexpected error: {e}")
688
finally:
689
    if 'dg_connection' in locals():
690
        dg_connection.close()
691
```

Version

Tile

Files

conversational-ai.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

conversational-ai.mddocs/