or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

administration.mdassistants-threads.mdbatch-processing.mdbeta-realtime.mdchat-completions.mdconfiguration-management.mdcontainer-content.mdcore-client.mdembeddings.mdevaluation-testing.mdfeedback-collections.mdfile-management.mdfine-tuning.mdframework-integrations.mdindex.mdkey-management.mdmodels.mdmultimodal-apis.mdobservability-analytics.mdprompt-management.mdprovider-integration.mdtext-completions.mduploads.mdvector-stores.md

beta-realtime.mddocs/

0

# Beta Realtime API

1

2

Real-time audio and WebSocket-based AI interactions for building conversational applications with low-latency voice communication. Supports real-time session management and WebSocket connections for streaming audio communication.

3

4

## Capabilities

5

6

### Real-Time Connection Management

7

8

Establishes WebSocket connections for real-time communication with AI models, enabling low-latency voice and audio interactions.

9

10

```python { .api }

11

class BetaRealtime:

12

def connect(

13

self,

14

*,

15

model: str,

16

websocket_connection_options: WebsocketConnectionOptions = {},

17

**kwargs

18

) -> RealtimeConnectionManager:

19

"""

20

Create a real-time WebSocket connection to an AI model.

21

22

Args:

23

model: Model identifier for real-time communication

24

websocket_connection_options: WebSocket configuration options

25

**kwargs: Additional connection parameters

26

27

Returns:

28

RealtimeConnectionManager: Connection manager for real-time communication

29

"""

30

31

sessions: BetaSessions

32

33

class AsyncBetaRealtime:

34

def connect(

35

self,

36

*,

37

model: str,

38

websocket_connection_options: WebsocketConnectionOptions = {},

39

**kwargs

40

) -> AsyncRealtimeConnectionManager:

41

"""Async version of connect method."""

42

43

sessions: AsyncBetaSessions

44

```

45

46

### Real-Time Session Management

47

48

Create and manage real-time sessions with configurable audio formats, voice settings, and interaction parameters.

49

50

```python { .api }

51

class BetaSessions:

52

def create(

53

self,

54

*,

55

model: Any = "portkey-default",

56

input_audio_format: Union[Any, NotGiven] = NOT_GIVEN,

57

input_audio_transcription: Union[Any, NotGiven] = NOT_GIVEN,

58

instructions: Union[str, NotGiven] = NOT_GIVEN,

59

max_response_output_tokens: Union[int, Any, NotGiven] = NOT_GIVEN,

60

modalities: Union[List[Any], NotGiven] = NOT_GIVEN,

61

output_audio_format: Union[Any, NotGiven] = NOT_GIVEN,

62

temperature: Union[float, NotGiven] = NOT_GIVEN,

63

tool_choice: Union[str, NotGiven] = NOT_GIVEN,

64

tools: Union[Iterable[Any], NotGiven] = NOT_GIVEN,

65

turn_detection: Union[Any, NotGiven] = NOT_GIVEN,

66

voice: Union[Any, NotGiven] = NOT_GIVEN

67

) -> SessionCreateResponse:

68

"""

69

Create a real-time session for voice communication.

70

71

Args:

72

model: Model to use for the session

73

input_audio_format: Format for input audio (e.g., "pcm16", "g711_ulaw")

74

input_audio_transcription: Configuration for input audio transcription

75

instructions: System instructions for the AI assistant

76

max_response_output_tokens: Maximum tokens in response

77

modalities: Supported modalities (audio, text)

78

output_audio_format: Format for output audio

79

temperature: Response randomness (0.0 to 2.0)

80

tool_choice: Tool selection strategy

81

tools: Available tools for the assistant

82

turn_detection: Turn detection configuration

83

voice: Voice model for audio output

84

85

Returns:

86

SessionCreateResponse: Session configuration and connection details

87

"""

88

89

class AsyncBetaSessions:

90

async def create(

91

self,

92

*,

93

model: Any = "portkey-default",

94

input_audio_format: Union[Any, NotGiven] = NOT_GIVEN,

95

input_audio_transcription: Union[Any, NotGiven] = NOT_GIVEN,

96

instructions: Union[str, NotGiven] = NOT_GIVEN,

97

max_response_output_tokens: Union[int, Any, NotGiven] = NOT_GIVEN,

98

modalities: Union[List[Any], NotGiven] = NOT_GIVEN,

99

output_audio_format: Union[Any, NotGiven] = NOT_GIVEN,

100

temperature: Union[float, NotGiven] = NOT_GIVEN,

101

tool_choice: Union[str, NotGiven] = NOT_GIVEN,

102

tools: Union[Iterable[Any], NotGiven] = NOT_GIVEN,

103

turn_detection: Union[Any, NotGiven] = NOT_GIVEN,

104

voice: Union[Any, NotGiven] = NOT_GIVEN

105

) -> SessionCreateResponse:

106

"""Async version of session creation."""

107

```

108

109

### Usage Examples

110

111

```python

112

from portkey_ai import Portkey

113

114

# Initialize client

115

portkey = Portkey(

116

api_key="PORTKEY_API_KEY",

117

virtual_key="VIRTUAL_KEY"

118

)

119

120

# Create a real-time session

121

session = portkey.beta.realtime.sessions.create(

122

model="gpt-4-realtime-preview",

123

modalities=["text", "audio"],

124

instructions="You are a helpful voice assistant.",

125

voice="alloy",

126

input_audio_format="pcm16",

127

output_audio_format="pcm16",

128

turn_detection={

129

"type": "server_vad",

130

"threshold": 0.5,

131

"prefix_padding_ms": 300,

132

"silence_duration_ms": 200

133

}

134

)

135

136

print(f"Session ID: {session.id}")

137

print(f"Model: {session.model}")

138

139

# Establish WebSocket connection

140

connection = portkey.beta.realtime.connect(

141

model="gpt-4-realtime-preview",

142

websocket_connection_options={

143

"timeout": 30,

144

"additional_headers": {

145

"Authorization": f"Bearer {portkey.api_key}"

146

}

147

}

148

)

149

150

# Use connection for real-time communication

151

# Note: Actual usage would involve WebSocket event handling

152

with connection as conn:

153

# Send audio data

154

conn.send_audio_data(audio_bytes)

155

156

# Handle responses

157

for event in conn.listen():

158

if event.type == "response.audio.delta":

159

# Process audio response

160

process_audio_chunk(event.delta)

161

elif event.type == "response.text.delta":

162

# Process text response

163

print(event.delta, end="")

164

```

165

166

### Async Usage

167

168

```python

169

import asyncio

170

from portkey_ai import AsyncPortkey

171

172

async def create_realtime_session():

173

portkey = AsyncPortkey(

174

api_key="PORTKEY_API_KEY",

175

virtual_key="VIRTUAL_KEY"

176

)

177

178

# Create session asynchronously

179

session = await portkey.beta.realtime.sessions.create(

180

model="gpt-4-realtime-preview",

181

modalities=["text", "audio"],

182

instructions="You are a voice assistant for customer support.",

183

voice="nova",

184

temperature=0.7,

185

max_response_output_tokens=150

186

)

187

188

# Establish async connection

189

connection = portkey.beta.realtime.connect(

190

model="gpt-4-realtime-preview"

191

)

192

193

return session, connection

194

195

# Run async function

196

session, connection = asyncio.run(create_realtime_session())

197

```

198

199

### Advanced Configuration

200

201

```python

202

# Configure detailed session parameters

203

session = portkey.beta.realtime.sessions.create(

204

model="gpt-4-realtime-preview",

205

modalities=["text", "audio"],

206

instructions="""

207

You are an AI assistant for a language learning app.

208

Help users practice pronunciation and provide feedback.

209

Speak clearly and at a moderate pace.

210

""",

211

voice="shimmer",

212

input_audio_format="pcm16",

213

output_audio_format="pcm16",

214

input_audio_transcription={

215

"model": "whisper-1"

216

},

217

turn_detection={

218

"type": "server_vad",

219

"threshold": 0.6,

220

"prefix_padding_ms": 300,

221

"silence_duration_ms": 500

222

},

223

tools=[

224

{

225

"type": "function",

226

"name": "pronunciation_feedback",

227

"description": "Provide pronunciation feedback",

228

"parameters": {

229

"type": "object",

230

"properties": {

231

"word": {"type": "string"},

232

"accuracy": {"type": "number"},

233

"feedback": {"type": "string"}

234

}

235

}

236

}

237

],

238

tool_choice="auto",

239

temperature=0.3,

240

max_response_output_tokens=100

241

)

242

```

243

244

## Types

245

246

```python { .api }

247

class SessionCreateResponse:

248

"""Response from real-time session creation"""

249

id: str # Session identifier

250

object: str # "realtime.session"

251

model: str # Model used for the session

252

modalities: List[str] # Supported modalities

253

instructions: str # System instructions

254

voice: str # Voice model

255

input_audio_format: str # Input audio format

256

output_audio_format: str # Output audio format

257

input_audio_transcription: dict # Transcription settings

258

turn_detection: dict # Turn detection configuration

259

tools: List[dict] # Available tools

260

tool_choice: str # Tool selection strategy

261

temperature: float # Response temperature

262

max_response_output_tokens: int # Token limit

263

_headers: Optional[dict] # Response headers

264

265

class RealtimeConnectionManager:

266

"""Synchronous WebSocket connection manager"""

267

def send_audio_data(self, audio_bytes: bytes) -> None: ...

268

def listen(self) -> Iterator[RealtimeEvent]: ...

269

def close(self) -> None: ...

270

271

class AsyncRealtimeConnectionManager:

272

"""Asynchronous WebSocket connection manager"""

273

async def send_audio_data(self, audio_bytes: bytes) -> None: ...

274

async def listen(self) -> AsyncIterator[RealtimeEvent]: ...

275

async def close(self) -> None: ...

276

277

class WebsocketConnectionOptions:

278

"""WebSocket connection configuration"""

279

timeout: Optional[int] # Connection timeout in seconds

280

additional_headers: Optional[dict] # Additional headers

281

# Additional WebSocket-specific options

282

283

class RealtimeEvent:

284

"""Real-time event from WebSocket connection"""

285

type: str # Event type

286

delta: Optional[str] # Content delta for streaming

287

audio: Optional[bytes] # Audio data

288

# Additional event-specific fields

289

```