Tessl Tile for pypi/portkey-ai@1.14.0

or run

npx @tessl/cli init

beta-realtime.mddocs/

0
# Beta Realtime API
1

2
Real-time audio and WebSocket-based AI interactions for building conversational applications with low-latency voice communication. Supports real-time session management and WebSocket connections for streaming audio communication.
3

4
## Capabilities
5

6
### Real-Time Connection Management
7

8
Establishes WebSocket connections for real-time communication with AI models, enabling low-latency voice and audio interactions.
9

10
```python { .api }
11
class BetaRealtime:
12
    def connect(
13
        self,
14
        *,
15
        model: str,
16
        websocket_connection_options: WebsocketConnectionOptions = {},
17
        **kwargs
18
    ) -> RealtimeConnectionManager:
19
        """
20
        Create a real-time WebSocket connection to an AI model.
21

22
        Args:
23
            model: Model identifier for real-time communication
24
            websocket_connection_options: WebSocket configuration options
25
            **kwargs: Additional connection parameters
26

27
        Returns:
28
            RealtimeConnectionManager: Connection manager for real-time communication
29
        """
30

31
    sessions: BetaSessions
32

33
class AsyncBetaRealtime:
34
    def connect(
35
        self,
36
        *,
37
        model: str,
38
        websocket_connection_options: WebsocketConnectionOptions = {},
39
        **kwargs
40
    ) -> AsyncRealtimeConnectionManager:
41
        """Async version of connect method."""
42

43
    sessions: AsyncBetaSessions
44
```
45

46
### Real-Time Session Management
47

48
Create and manage real-time sessions with configurable audio formats, voice settings, and interaction parameters.
49

50
```python { .api }
51
class BetaSessions:
52
    def create(
53
        self,
54
        *,
55
        model: Any = "portkey-default",
56
        input_audio_format: Union[Any, NotGiven] = NOT_GIVEN,
57
        input_audio_transcription: Union[Any, NotGiven] = NOT_GIVEN,
58
        instructions: Union[str, NotGiven] = NOT_GIVEN,
59
        max_response_output_tokens: Union[int, Any, NotGiven] = NOT_GIVEN,
60
        modalities: Union[List[Any], NotGiven] = NOT_GIVEN,
61
        output_audio_format: Union[Any, NotGiven] = NOT_GIVEN,
62
        temperature: Union[float, NotGiven] = NOT_GIVEN,
63
        tool_choice: Union[str, NotGiven] = NOT_GIVEN,
64
        tools: Union[Iterable[Any], NotGiven] = NOT_GIVEN,
65
        turn_detection: Union[Any, NotGiven] = NOT_GIVEN,
66
        voice: Union[Any, NotGiven] = NOT_GIVEN
67
    ) -> SessionCreateResponse:
68
        """
69
        Create a real-time session for voice communication.
70

71
        Args:
72
            model: Model to use for the session
73
            input_audio_format: Format for input audio (e.g., "pcm16", "g711_ulaw")
74
            input_audio_transcription: Configuration for input audio transcription
75
            instructions: System instructions for the AI assistant
76
            max_response_output_tokens: Maximum tokens in response
77
            modalities: Supported modalities (audio, text)
78
            output_audio_format: Format for output audio
79
            temperature: Response randomness (0.0 to 2.0)
80
            tool_choice: Tool selection strategy
81
            tools: Available tools for the assistant
82
            turn_detection: Turn detection configuration
83
            voice: Voice model for audio output
84

85
        Returns:
86
            SessionCreateResponse: Session configuration and connection details
87
        """
88

89
class AsyncBetaSessions:
90
    async def create(
91
        self,
92
        *,
93
        model: Any = "portkey-default",
94
        input_audio_format: Union[Any, NotGiven] = NOT_GIVEN,
95
        input_audio_transcription: Union[Any, NotGiven] = NOT_GIVEN,
96
        instructions: Union[str, NotGiven] = NOT_GIVEN,
97
        max_response_output_tokens: Union[int, Any, NotGiven] = NOT_GIVEN,
98
        modalities: Union[List[Any], NotGiven] = NOT_GIVEN,
99
        output_audio_format: Union[Any, NotGiven] = NOT_GIVEN,
100
        temperature: Union[float, NotGiven] = NOT_GIVEN,
101
        tool_choice: Union[str, NotGiven] = NOT_GIVEN,
102
        tools: Union[Iterable[Any], NotGiven] = NOT_GIVEN,
103
        turn_detection: Union[Any, NotGiven] = NOT_GIVEN,
104
        voice: Union[Any, NotGiven] = NOT_GIVEN
105
    ) -> SessionCreateResponse:
106
        """Async version of session creation."""
107
```
108

109
### Usage Examples
110

111
```python
112
from portkey_ai import Portkey
113

114
# Initialize client
115
portkey = Portkey(
116
    api_key="PORTKEY_API_KEY",
117
    virtual_key="VIRTUAL_KEY"
118
)
119

120
# Create a real-time session
121
session = portkey.beta.realtime.sessions.create(
122
    model="gpt-4-realtime-preview",
123
    modalities=["text", "audio"],
124
    instructions="You are a helpful voice assistant.",
125
    voice="alloy",
126
    input_audio_format="pcm16",
127
    output_audio_format="pcm16",
128
    turn_detection={
129
        "type": "server_vad",
130
        "threshold": 0.5,
131
        "prefix_padding_ms": 300,
132
        "silence_duration_ms": 200
133
    }
134
)
135

136
print(f"Session ID: {session.id}")
137
print(f"Model: {session.model}")
138

139
# Establish WebSocket connection
140
connection = portkey.beta.realtime.connect(
141
    model="gpt-4-realtime-preview",
142
    websocket_connection_options={
143
        "timeout": 30,
144
        "additional_headers": {
145
            "Authorization": f"Bearer {portkey.api_key}"
146
        }
147
    }
148
)
149

150
# Use connection for real-time communication
151
# Note: Actual usage would involve WebSocket event handling
152
with connection as conn:
153
    # Send audio data
154
    conn.send_audio_data(audio_bytes)
155
    
156
    # Handle responses
157
    for event in conn.listen():
158
        if event.type == "response.audio.delta":
159
            # Process audio response
160
            process_audio_chunk(event.delta)
161
        elif event.type == "response.text.delta":
162
            # Process text response
163
            print(event.delta, end="")
164
```
165

166
### Async Usage
167

168
```python
169
import asyncio
170
from portkey_ai import AsyncPortkey
171

172
async def create_realtime_session():
173
    portkey = AsyncPortkey(
174
        api_key="PORTKEY_API_KEY",
175
        virtual_key="VIRTUAL_KEY"
176
    )
177
    
178
    # Create session asynchronously
179
    session = await portkey.beta.realtime.sessions.create(
180
        model="gpt-4-realtime-preview",
181
        modalities=["text", "audio"],
182
        instructions="You are a voice assistant for customer support.",
183
        voice="nova",
184
        temperature=0.7,
185
        max_response_output_tokens=150
186
    )
187
    
188
    # Establish async connection
189
    connection = portkey.beta.realtime.connect(
190
        model="gpt-4-realtime-preview"
191
    )
192
    
193
    return session, connection
194

195
# Run async function
196
session, connection = asyncio.run(create_realtime_session())
197
```
198

199
### Advanced Configuration
200

201
```python
202
# Configure detailed session parameters
203
session = portkey.beta.realtime.sessions.create(
204
    model="gpt-4-realtime-preview",
205
    modalities=["text", "audio"],
206
    instructions="""
207
    You are an AI assistant for a language learning app.
208
    Help users practice pronunciation and provide feedback.
209
    Speak clearly and at a moderate pace.
210
    """,
211
    voice="shimmer",
212
    input_audio_format="pcm16",
213
    output_audio_format="pcm16",
214
    input_audio_transcription={
215
        "model": "whisper-1"
216
    },
217
    turn_detection={
218
        "type": "server_vad",
219
        "threshold": 0.6,
220
        "prefix_padding_ms": 300,
221
        "silence_duration_ms": 500
222
    },
223
    tools=[
224
        {
225
            "type": "function",
226
            "name": "pronunciation_feedback",
227
            "description": "Provide pronunciation feedback",
228
            "parameters": {
229
                "type": "object",
230
                "properties": {
231
                    "word": {"type": "string"},
232
                    "accuracy": {"type": "number"},
233
                    "feedback": {"type": "string"}
234
                }
235
            }
236
        }
237
    ],
238
    tool_choice="auto",
239
    temperature=0.3,
240
    max_response_output_tokens=100
241
)
242
```
243

244
## Types
245

246
```python { .api }
247
class SessionCreateResponse:
248
    """Response from real-time session creation"""
249
    id: str  # Session identifier
250
    object: str  # "realtime.session"
251
    model: str  # Model used for the session
252
    modalities: List[str]  # Supported modalities
253
    instructions: str  # System instructions
254
    voice: str  # Voice model
255
    input_audio_format: str  # Input audio format
256
    output_audio_format: str  # Output audio format
257
    input_audio_transcription: dict  # Transcription settings
258
    turn_detection: dict  # Turn detection configuration
259
    tools: List[dict]  # Available tools
260
    tool_choice: str  # Tool selection strategy
261
    temperature: float  # Response temperature
262
    max_response_output_tokens: int  # Token limit
263
    _headers: Optional[dict]  # Response headers
264

265
class RealtimeConnectionManager:
266
    """Synchronous WebSocket connection manager"""
267
    def send_audio_data(self, audio_bytes: bytes) -> None: ...
268
    def listen(self) -> Iterator[RealtimeEvent]: ...
269
    def close(self) -> None: ...
270

271
class AsyncRealtimeConnectionManager:
272
    """Asynchronous WebSocket connection manager"""
273
    async def send_audio_data(self, audio_bytes: bytes) -> None: ...
274
    async def listen(self) -> AsyncIterator[RealtimeEvent]: ...
275
    async def close(self) -> None: ...
276

277
class WebsocketConnectionOptions:
278
    """WebSocket connection configuration"""
279
    timeout: Optional[int]  # Connection timeout in seconds
280
    additional_headers: Optional[dict]  # Additional headers
281
    # Additional WebSocket-specific options
282

283
class RealtimeEvent:
284
    """Real-time event from WebSocket connection"""
285
    type: str  # Event type
286
    delta: Optional[str]  # Content delta for streaming
287
    audio: Optional[bytes]  # Audio data
288
    # Additional event-specific fields
289
```

Version

Tile

Files

beta-realtime.mddocs/

Version

Tile

Files

beta-realtime.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

beta-realtime.mddocs/