or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio-utilities.mdconversational-ai.mdindex.mdproject-management.mdspeech-to-text.mdtext-analysis.mdtext-to-speech.md

conversational-ai.mddocs/

0

# Conversational AI

1

2

Real-time conversational AI capabilities enabling voice-based interactions with intelligent agents. The Agent module supports function calling, dynamic prompt updates, bidirectional audio streaming, and sophisticated conversation management for building interactive voice applications.

3

4

## Capabilities

5

6

### Agent WebSocket Client

7

8

Real-time WebSocket clients for conversational AI interactions with full duplex audio streaming and message handling.

9

10

```python { .api }

11

class AgentWebSocketClient:

12

def start(self, options: SettingsOptions) -> bool:

13

"""

14

Start WebSocket connection for agent interaction.

15

16

Args:

17

options: Agent configuration settings

18

19

Returns:

20

bool: True if connection started successfully

21

"""

22

23

def send_settings(self, settings: SettingsOptions) -> bool:

24

"""

25

Update agent settings during conversation.

26

27

Args:

28

settings: New agent configuration

29

30

Returns:

31

bool: True if settings sent successfully

32

"""

33

34

def update_prompt(self, options: UpdatePromptOptions) -> bool:

35

"""

36

Update the agent's system prompt.

37

38

Args:

39

options: New prompt configuration

40

41

Returns:

42

bool: True if prompt updated successfully

43

"""

44

45

def update_speak_options(self, options: UpdateSpeakOptions) -> bool:

46

"""

47

Update the agent's speech synthesis settings.

48

49

Args:

50

options: New speak configuration

51

52

Returns:

53

bool: True if speak options updated successfully

54

"""

55

56

def inject_agent_message(self, options: InjectAgentMessageOptions) -> bool:

57

"""

58

Inject a message as if spoken by the agent.

59

60

Args:

61

options: Message injection configuration

62

63

Returns:

64

bool: True if message injected successfully

65

"""

66

67

def inject_user_message(self, options: InjectUserMessageOptions) -> bool:

68

"""

69

Inject a message as if spoken by the user.

70

71

Args:

72

options: Message injection configuration

73

74

Returns:

75

bool: True if message injected successfully

76

"""

77

78

def send_function_call_response(self, response: FunctionCallResponse) -> bool:

79

"""

80

Send response to agent function call request.

81

82

Args:

83

response: Function call result

84

85

Returns:

86

bool: True if response sent successfully

87

"""

88

89

def keep_alive(self) -> bool:

90

"""

91

Send keep-alive message to maintain connection.

92

93

Returns:

94

bool: True if keep-alive sent successfully

95

"""

96

97

def send_audio(self, audio_data: bytes) -> bool:

98

"""

99

Send audio data to the agent.

100

101

Args:

102

audio_data: Raw audio bytes

103

104

Returns:

105

bool: True if audio sent successfully

106

"""

107

108

def close(self) -> bool:

109

"""

110

Close WebSocket connection.

111

112

Returns:

113

bool: True if connection closed successfully

114

"""

115

116

class AsyncAgentWebSocketClient:

117

# All methods are async versions of AgentWebSocketClient methods

118

async def start(self, options: SettingsOptions) -> bool: ...

119

async def send_settings(self, settings: SettingsOptions) -> bool: ...

120

async def update_prompt(self, options: UpdatePromptOptions) -> bool: ...

121

# ... (all other methods with async keyword)

122

```

123

124

### Router Access

125

126

Access conversational AI clients through the main client's agent router.

127

128

```python { .api }

129

class AgentRouter:

130

@property

131

def websocket(self) -> AgentWebSocketClient: ...

132

@property

133

def asyncwebsocket(self) -> AsyncAgentWebSocketClient: ...

134

```

135

136

### Options Classes

137

138

#### Top-level Configuration

139

140

```python { .api }

141

class SettingsOptions:

142

def __init__(self, **kwargs): ...

143

agent: Agent # Agent configuration

144

listen: Listen = None # Speech-to-text settings

145

speak: Speak = None # Text-to-speech settings

146

think: Think = None # Thinking/processing settings

147

148

class UpdatePromptOptions:

149

def __init__(self, **kwargs): ...

150

prompt: str # New system prompt text

151

152

class UpdateSpeakOptions:

153

def __init__(self, **kwargs): ...

154

speak: Speak # New speech synthesis settings

155

156

class InjectAgentMessageOptions:

157

def __init__(self, **kwargs): ...

158

text: str # Message text to inject

159

160

class InjectUserMessageOptions:

161

def __init__(self, **kwargs): ...

162

text: str # User message text to inject

163

164

class FunctionCallResponse:

165

def __init__(self, **kwargs): ...

166

name: str # Function name

167

result: str # Function execution result

168

169

class AgentKeepAlive:

170

def __init__(self, **kwargs): ...

171

type: str = "KeepAlive" # Message type

172

```

173

174

#### Sub-level Configuration

175

176

```python { .api }

177

class Agent:

178

def __init__(self, **kwargs): ...

179

listen: Listen # Listening configuration

180

think: Think # Thinking configuration

181

speak: Speak # Speaking configuration

182

183

class Listen:

184

def __init__(self, **kwargs): ...

185

model: str = "nova-2" # STT model

186

language: str = "en-US" # Language code

187

smart_format: bool = True # Smart formatting

188

encoding: str = "linear16" # Audio encoding

189

sample_rate: int = 16000 # Sample rate

190

channels: int = 1 # Audio channels

191

interim_results: bool = True # Interim results

192

vad_events: bool = True # Voice activity detection

193

endpointing: bool = True # Endpoint detection

194

195

class Speak:

196

def __init__(self, **kwargs): ...

197

model: str = "aura-asteria-en" # TTS model

198

encoding: str = "linear16" # Audio encoding

199

sample_rate: int = 24000 # Sample rate

200

container: str = "none" # Audio container

201

202

class Think:

203

def __init__(self, **kwargs): ...

204

provider: Provider # AI provider configuration

205

model: str = "gpt-4" # Language model

206

instructions: str = "" # System instructions

207

functions: list[Function] = None # Available functions

208

209

class Provider:

210

def __init__(self, **kwargs): ...

211

type: str = "open_ai" # Provider type

212

```

213

214

#### Function Configuration

215

216

```python { .api }

217

class Function:

218

def __init__(self, **kwargs): ...

219

name: str # Function name

220

description: str # Function description

221

parameters: Parameters # Function parameters schema

222

223

class Parameters:

224

def __init__(self, **kwargs): ...

225

type: str = "object" # Parameters type

226

properties: Properties # Parameter properties

227

required: list[str] = None # Required parameters

228

229

class Properties:

230

def __init__(self, **kwargs): ...

231

# Dynamic properties based on function parameters

232

233

class Header:

234

def __init__(self, **kwargs): ...

235

name: str # Header name

236

value: str # Header value

237

238

class Item:

239

def __init__(self, **kwargs): ...

240

# Generic item configuration

241

242

class Input:

243

def __init__(self, **kwargs): ...

244

# Input configuration

245

246

class Output:

247

def __init__(self, **kwargs): ...

248

# Output configuration

249

250

class Audio:

251

def __init__(self, **kwargs): ...

252

# Audio configuration

253

254

class Endpoint:

255

def __init__(self, **kwargs): ...

256

# Endpoint configuration

257

```

258

259

### Response Types

260

261

#### Agent-Specific Responses

262

263

```python { .api }

264

class WelcomeResponse:

265

"""Initial connection welcome message"""

266

type: str = "Welcome"

267

message: str

268

269

class SettingsAppliedResponse:

270

"""Settings update confirmation"""

271

type: str = "SettingsApplied"

272

settings: dict

273

274

class ConversationTextResponse:

275

"""Conversation text event"""

276

type: str = "ConversationText"

277

text: str

278

role: str # "user" or "assistant"

279

280

class UserStartedSpeakingResponse:

281

"""User speech detection event"""

282

type: str = "UserStartedSpeaking"

283

timestamp: str

284

285

class AgentThinkingResponse:

286

"""Agent processing indication"""

287

type: str = "AgentThinking"

288

289

class FunctionCall:

290

"""Function call data"""

291

name: str

292

arguments: dict

293

294

class FunctionCallRequest:

295

"""Function call request from agent"""

296

type: str = "FunctionCallRequest"

297

function_call: FunctionCall

298

call_id: str

299

300

class AgentStartedSpeakingResponse:

301

"""Agent speech start event"""

302

type: str = "AgentStartedSpeaking"

303

timestamp: str

304

305

class AgentAudioDoneResponse:

306

"""Agent finished speaking event"""

307

type: str = "AgentAudioDone"

308

309

class InjectionRefusedResponse:

310

"""Message injection refusal"""

311

type: str = "InjectionRefused"

312

message: str

313

314

# Common WebSocket responses are inherited:

315

# OpenResponse, CloseResponse, ErrorResponse, UnhandledResponse

316

```

317

318

### Events

319

320

```python { .api }

321

class AgentWebSocketEvents:

322

"""WebSocket event types for conversational AI"""

323

324

# Server Events (received from agent)

325

Open: str = "Open"

326

Close: str = "Close"

327

AudioData: str = "AudioData"

328

Welcome: str = "Welcome"

329

SettingsApplied: str = "SettingsApplied"

330

ConversationText: str = "ConversationText"

331

UserStartedSpeaking: str = "UserStartedSpeaking"

332

AgentThinking: str = "AgentThinking"

333

FunctionCallRequest: str = "FunctionCallRequest"

334

AgentStartedSpeaking: str = "AgentStartedSpeaking"

335

AgentAudioDone: str = "AgentAudioDone"

336

Error: str = "Error"

337

Unhandled: str = "Unhandled"

338

339

# Client Events (sent to agent)

340

Settings: str = "Settings"

341

UpdatePrompt: str = "UpdatePrompt"

342

UpdateSpeak: str = "UpdateSpeak"

343

InjectAgentMessage: str = "InjectAgentMessage"

344

InjectUserMessage: str = "InjectUserMessage"

345

InjectionRefused: str = "InjectionRefused"

346

AgentKeepAlive: str = "KeepAlive"

347

```

348

349

## Usage Examples

350

351

### Basic Conversational Agent

352

353

```python

354

from deepgram import DeepgramClient, SettingsOptions, Agent, Listen, Speak, Think, Provider, AgentWebSocketEvents

355

import threading

356

357

client = DeepgramClient(api_key="your-api-key")

358

359

def on_open(self, open_event, **kwargs):

360

print("Agent connection opened")

361

362

def on_welcome(self, welcome, **kwargs):

363

print(f"Agent welcome: {welcome.message}")

364

365

def on_conversation_text(self, text_event, **kwargs):

366

print(f"{text_event.role}: {text_event.text}")

367

368

def on_user_started_speaking(self, event, **kwargs):

369

print("User started speaking")

370

371

def on_agent_thinking(self, event, **kwargs):

372

print("Agent is thinking...")

373

374

def on_agent_started_speaking(self, event, **kwargs):

375

print("Agent started speaking")

376

377

def on_agent_audio_done(self, event, **kwargs):

378

print("Agent finished speaking")

379

380

def on_audio_data(self, audio_data, **kwargs):

381

# Handle agent's speech audio

382

# In a real application, you'd play this audio

383

print(f"Received {len(audio_data)} bytes of audio")

384

385

def on_error(self, error, **kwargs):

386

print(f"Agent error: {error}")

387

388

# Configure agent settings

389

agent_settings = SettingsOptions(

390

agent=Agent(

391

listen=Listen(

392

model="nova-2",

393

language="en-US",

394

smart_format=True,

395

encoding="linear16",

396

sample_rate=16000,

397

interim_results=True,

398

vad_events=True

399

),

400

think=Think(

401

provider=Provider(type="open_ai"),

402

model="gpt-4",

403

instructions="You are a helpful AI assistant. Be conversational and friendly."

404

),

405

speak=Speak(

406

model="aura-asteria-en",

407

encoding="linear16",

408

sample_rate=24000

409

)

410

)

411

)

412

413

# Create connection

414

dg_connection = client.agent.websocket.v("1")

415

416

# Set up event handlers

417

dg_connection.on(AgentWebSocketEvents.Open, on_open)

418

dg_connection.on(AgentWebSocketEvents.Welcome, on_welcome)

419

dg_connection.on(AgentWebSocketEvents.ConversationText, on_conversation_text)

420

dg_connection.on(AgentWebSocketEvents.UserStartedSpeaking, on_user_started_speaking)

421

dg_connection.on(AgentWebSocketEvents.AgentThinking, on_agent_thinking)

422

dg_connection.on(AgentWebSocketEvents.AgentStartedSpeaking, on_agent_started_speaking)

423

dg_connection.on(AgentWebSocketEvents.AgentAudioDone, on_agent_audio_done)

424

dg_connection.on(AgentWebSocketEvents.AudioData, on_audio_data)

425

dg_connection.on(AgentWebSocketEvents.Error, on_error)

426

427

# Start connection

428

if dg_connection.start(agent_settings):

429

print("Agent connection started")

430

431

# Send audio data (typically from microphone)

432

# audio_data = get_microphone_data()

433

# dg_connection.send_audio(audio_data)

434

435

# Keep connection alive

436

# dg_connection.keep_alive()

437

438

# Close when done

439

dg_connection.close()

440

```

441

442

### Agent with Function Calling

443

444

```python

445

from deepgram import (

446

DeepgramClient, SettingsOptions, Agent, Think, Provider, Function,

447

Parameters, Properties, FunctionCallResponse, AgentWebSocketEvents

448

)

449

import json

450

451

client = DeepgramClient(api_key="your-api-key")

452

453

def on_function_call_request(self, request, **kwargs):

454

"""Handle function call requests from the agent"""

455

print(f"Function call: {request.function_call.name}")

456

print(f"Arguments: {request.function_call.arguments}")

457

458

# Execute the function based on name

459

if request.function_call.name == "get_weather":

460

location = request.function_call.arguments.get("location")

461

weather_data = get_weather(location) # Your weather function

462

463

# Send response back to agent

464

response = FunctionCallResponse(

465

name=request.function_call.name,

466

result=json.dumps(weather_data)

467

)

468

dg_connection.send_function_call_response(response)

469

470

elif request.function_call.name == "set_reminder":

471

reminder = request.function_call.arguments.get("reminder")

472

time = request.function_call.arguments.get("time")

473

result = set_reminder(reminder, time) # Your reminder function

474

475

response = FunctionCallResponse(

476

name=request.function_call.name,

477

result=json.dumps({"success": result})

478

)

479

dg_connection.send_function_call_response(response)

480

481

def get_weather(location):

482

"""Mock weather function"""

483

return {

484

"location": location,

485

"temperature": 72,

486

"condition": "sunny",

487

"humidity": 45

488

}

489

490

def set_reminder(reminder, time):

491

"""Mock reminder function"""

492

print(f"Setting reminder: {reminder} at {time}")

493

return True

494

495

# Define available functions

496

weather_function = Function(

497

name="get_weather",

498

description="Get current weather information for a location",

499

parameters=Parameters(

500

type="object",

501

properties={

502

"location": {"type": "string", "description": "City name or location"}

503

},

504

required=["location"]

505

)

506

)

507

508

reminder_function = Function(

509

name="set_reminder",

510

description="Set a reminder for the user",

511

parameters=Parameters(

512

type="object",

513

properties={

514

"reminder": {"type": "string", "description": "Reminder text"},

515

"time": {"type": "string", "description": "Time for the reminder"}

516

},

517

required=["reminder", "time"]

518

)

519

)

520

521

# Configure agent with functions

522

agent_settings = SettingsOptions(

523

agent=Agent(

524

think=Think(

525

provider=Provider(type="open_ai"),

526

model="gpt-4",

527

instructions="You are a helpful assistant with access to weather and reminder functions. Use them when appropriate.",

528

functions=[weather_function, reminder_function]

529

)

530

# ... other agent configuration

531

)

532

)

533

534

dg_connection = client.agent.websocket.v("1")

535

dg_connection.on(AgentWebSocketEvents.FunctionCallRequest, on_function_call_request)

536

537

if dg_connection.start(agent_settings):

538

# Agent can now call functions during conversation

539

pass

540

```

541

542

### Dynamic Agent Updates

543

544

```python

545

from deepgram import (

546

DeepgramClient, UpdatePromptOptions, UpdateSpeakOptions,

547

InjectAgentMessageOptions, InjectUserMessageOptions, Speak

548

)

549

550

client = DeepgramClient(api_key="your-api-key")

551

dg_connection = client.agent.websocket.v("1")

552

553

# Start with initial settings

554

if dg_connection.start(initial_settings):

555

556

# Update the agent's personality/instructions

557

new_prompt = UpdatePromptOptions(

558

prompt="You are now a cheerful children's storyteller. Use simple language and be very enthusiastic."

559

)

560

dg_connection.update_prompt(new_prompt)

561

562

# Change the voice model

563

new_speak_options = UpdateSpeakOptions(

564

speak=Speak(

565

model="aura-luna-en", # Different voice

566

encoding="linear16",

567

sample_rate=24000

568

)

569

)

570

dg_connection.update_speak_options(new_speak_options)

571

572

# Inject context into the conversation

573

agent_message = InjectAgentMessageOptions(

574

text="I just switched to storytelling mode! What kind of story would you like to hear?"

575

)

576

dg_connection.inject_agent_message(agent_message)

577

578

# Inject user context

579

user_message = InjectUserMessageOptions(

580

text="The user mentioned they like adventure stories about pirates."

581

)

582

dg_connection.inject_user_message(user_message)

583

```

584

585

### Multi-Agent Conversation

586

587

```python

588

from deepgram import DeepgramClient, SettingsOptions, Agent, Think, Provider

589

import asyncio

590

591

async def create_agent(client, agent_id, instructions):

592

"""Create and configure an agent"""

593

settings = SettingsOptions(

594

agent=Agent(

595

think=Think(

596

provider=Provider(type="open_ai"),

597

model="gpt-4",

598

instructions=f"Agent {agent_id}: {instructions}"

599

)

600

# ... other configuration

601

)

602

)

603

604

connection = client.agent.asyncwebsocket.v("1")

605

await connection.start(settings)

606

return connection

607

608

async def multi_agent_example():

609

client = DeepgramClient(api_key="your-api-key")

610

611

# Create multiple agents with different roles

612

moderator = await create_agent(

613

client, "Moderator",

614

"You are a meeting moderator. Keep discussions on track and summarize key points."

615

)

616

617

expert1 = await create_agent(

618

client, "Expert1",

619

"You are a technical expert. Provide detailed technical insights."

620

)

621

622

expert2 = await create_agent(

623

client, "Expert2",

624

"You are a business expert. Focus on practical business implications."

625

)

626

627

# Coordinate conversation between agents

628

# This would involve managing turn-taking and message passing

629

# between the different agent connections

630

631

# Close connections when done

632

await moderator.close()

633

await expert1.close()

634

await expert2.close()

635

636

# Run multi-agent example

637

asyncio.run(multi_agent_example())

638

```

639

640

### Error Handling and Recovery

641

642

```python

643

from deepgram import DeepgramClient, DeepgramApiError, SettingsOptions, AgentWebSocketEvents

644

645

client = DeepgramClient(api_key="your-api-key")

646

647

def on_error(self, error, **kwargs):

648

"""Handle various error types"""

649

print(f"Agent error: {error}")

650

651

# Implement error-specific recovery logic

652

if "connection" in str(error).lower():

653

print("Connection error - attempting to reconnect...")

654

# Implement reconnection logic

655

elif "authentication" in str(error).lower():

656

print("Authentication error - check API key")

657

else:

658

print("Unknown error - logging for investigation")

659

660

def on_injection_refused(self, refusal, **kwargs):

661

"""Handle message injection refusals"""

662

print(f"Message injection refused: {refusal.message}")

663

# Implement fallback logic

664

665

try:

666

settings = SettingsOptions(

667

# ... agent configuration

668

)

669

670

dg_connection = client.agent.websocket.v("1")

671

dg_connection.on(AgentWebSocketEvents.Error, on_error)

672

dg_connection.on(AgentWebSocketEvents.InjectionRefused, on_injection_refused)

673

674

if dg_connection.start(settings):

675

# Connection successful

676

print("Agent started successfully")

677

678

# Implement connection health monitoring

679

# dg_connection.keep_alive() # Send periodically

680

681

else:

682

print("Failed to start agent connection")

683

684

except DeepgramApiError as e:

685

print(f"API Error: {e}")

686

except Exception as e:

687

print(f"Unexpected error: {e}")

688

finally:

689

if 'dg_connection' in locals():

690

dg_connection.close()

691

```