0
# Chat Completions
1
2
Modern chat completion API for conversational AI applications. Supports system messages, user messages, assistant messages, streaming responses, function calling, and comprehensive response metadata including token usage and timing information.
3
4
## Capabilities
5
6
### Chat Completion Creation
7
8
Creates chat completions using the conversational message format with support for various AI models and extensive configuration options.
9
10
```python { .api }
11
def create(
12
self,
13
*,
14
messages: Iterable[completion_create_params.Message],
15
model: str,
16
frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
17
logit_bias: Optional[object] | NotGiven = NOT_GIVEN,
18
logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
19
max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
20
max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
21
min_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
22
min_tokens: Optional[int] | NotGiven = NOT_GIVEN,
23
n: Optional[int] | NotGiven = NOT_GIVEN,
24
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
25
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
26
reasoning_effort: Optional[Literal["low", "medium", "high"]] | NotGiven = NOT_GIVEN,
27
response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
28
seed: Optional[int] | NotGiven = NOT_GIVEN,
29
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
30
stop: Union[str, List[str], None] | NotGiven = NOT_GIVEN,
31
stream: Optional[bool] | NotGiven = NOT_GIVEN,
32
stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
33
temperature: Optional[float] | NotGiven = NOT_GIVEN,
34
tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
35
tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
36
top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
37
top_p: Optional[float] | NotGiven = NOT_GIVEN,
38
user: Optional[str] | NotGiven = NOT_GIVEN,
39
cf_ray: str | NotGiven = NOT_GIVEN,
40
x_amz_cf_id: str | NotGiven = NOT_GIVEN,
41
x_delay_time: float | NotGiven = NOT_GIVEN,
42
extra_headers: Headers | None = None,
43
extra_query: Query | None = None,
44
extra_body: Body | None = None,
45
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
46
) -> ChatCompletion | Stream[ChatCompletion]:
47
"""
48
Create a chat completion.
49
50
Parameters:
51
- messages: List of conversation messages with roles (system, user, assistant)
52
- model: ID of the model to use (e.g., "llama3.1-70b")
53
- frequency_penalty: Penalty for frequent token usage (-2.0 to 2.0)
54
- logit_bias: Modify likelihood of specific tokens appearing (JSON object)
55
- logprobs: Return log probabilities of output tokens
56
- max_completion_tokens: Maximum number of completion tokens to generate
57
- max_tokens: Maximum number of tokens to generate (legacy parameter)
58
- min_completion_tokens: Minimum number of completion tokens to generate
59
- min_tokens: Minimum number of tokens to generate (legacy parameter)
60
- n: Number of completion choices to generate
61
- parallel_tool_calls: Enable parallel tool calling
62
- presence_penalty: Penalty for token presence (-2.0 to 2.0)
63
- reasoning_effort: Reasoning effort level ("low", "medium", "high")
64
- response_format: Format of the response (text or json_object)
65
- seed: Random seed for deterministic generation
66
- service_tier: Service tier for request processing ("auto", "default")
67
- stop: Sequences where generation should stop
68
- stream: Enable streaming response (use stream=True for streaming)
69
- stream_options: Additional streaming options
70
- temperature: Sampling temperature (0.0 to 2.0)
71
- tool_choice: Control tool calling behavior
72
- tools: List of available tools/functions
73
- top_logprobs: Number of top log probabilities to return per token
74
- top_p: Nucleus sampling parameter
75
- user: Unique identifier for the end-user
76
- cf_ray: CloudFlare Ray ID for request tracing
77
- x_amz_cf_id: Amazon CloudFront ID for request tracing
78
- x_delay_time: Additional delay time for request processing
79
- extra_headers: Additional headers to include with the request
80
- extra_query: Additional query parameters
81
- extra_body: Additional request body data
82
- timeout: Request timeout override
83
84
Returns:
85
ChatCompletion object or Stream[ChatCompletion] for streaming responses
86
"""
87
```
88
89
### Streaming Chat Completion
90
91
Creates streaming chat completions for real-time token generation and immediate response delivery.
92
93
```python { .api }
94
def create(
95
self,
96
*,
97
messages: Iterable[CompletionCreateParams.Message],
98
model: str,
99
stream: Literal[True],
100
**kwargs
101
) -> Stream[ChatCompletionChunk]:
102
"""
103
Create a streaming chat completion.
104
105
Parameters:
106
- stream: Must be True for streaming responses
107
- All other parameters same as non-streaming create()
108
109
Returns:
110
Stream object yielding ChatCompletionChunk objects
111
"""
112
```
113
114
### Resource Classes
115
116
Synchronous and asynchronous resource classes that provide the chat completion API methods.
117
118
```python { .api }
119
class ChatResource:
120
"""Synchronous chat resource."""
121
completions: CompletionsResource
122
123
@cached_property
124
def with_raw_response(self) -> ChatResourceWithRawResponse: ...
125
126
@cached_property
127
def with_streaming_response(self) -> ChatResourceWithStreamingResponse: ...
128
129
class AsyncChatResource:
130
"""Asynchronous chat resource."""
131
completions: AsyncCompletionsResource
132
133
@cached_property
134
def with_raw_response(self) -> AsyncChatResourceWithRawResponse: ...
135
136
@cached_property
137
def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse: ...
138
139
class CompletionsResource(SyncAPIResource):
140
"""Synchronous chat completions resource."""
141
142
class AsyncCompletionsResource(AsyncAPIResource):
143
"""Asynchronous chat completions resource."""
144
```
145
146
## Message Types
147
148
### Message Structure
149
150
```python { .api }
151
class Message(TypedDict):
152
"""Base message structure for chat completions."""
153
role: Literal["system", "user", "assistant", "tool"]
154
content: str
155
name: NotRequired[str] # Optional name for the message author
156
157
class SystemMessage(Message):
158
"""System message for setting context and instructions."""
159
role: Literal["system"]
160
content: str
161
162
class UserMessage(Message):
163
"""User message containing the user's input."""
164
role: Literal["user"]
165
content: str
166
167
class AssistantMessage(Message):
168
"""Assistant message with the AI's response."""
169
role: Literal["assistant"]
170
content: str
171
tool_calls: NotRequired[List[ToolCall]] # Optional tool calls
172
173
class ToolMessage(Message):
174
"""Tool message containing tool execution results."""
175
role: Literal["tool"]
176
content: str
177
tool_call_id: str # ID of the tool call this responds to
178
```
179
180
### Tool Calling Types
181
182
```python { .api }
183
class Tool(TypedDict):
184
"""Tool/function definition for function calling."""
185
type: Literal["function"]
186
function: FunctionDefinition
187
188
class FunctionDefinition(TypedDict):
189
"""Function definition with name, description, and parameters."""
190
name: str
191
description: str
192
parameters: Dict[str, Any] # JSON Schema for parameters
193
194
class ToolCall(TypedDict):
195
"""Tool call made by the assistant."""
196
id: str
197
type: Literal["function"]
198
function: FunctionCall
199
200
class FunctionCall(TypedDict):
201
"""Function call details."""
202
name: str
203
arguments: str # JSON string of arguments
204
205
class ToolChoice(TypedDict):
206
"""Tool choice configuration."""
207
type: Literal["function"]
208
function: Dict[str, str] # {"name": "function_name"}
209
```
210
211
## Response Types
212
213
### Chat Completion Response
214
215
```python { .api }
216
class ChatCompletion(BaseModel):
217
"""Complete chat completion response."""
218
id: str
219
choices: List[ChatCompletionChoice]
220
created: int
221
model: str
222
object: Literal["chat.completion"]
223
system_fingerprint: Optional[str]
224
usage: Optional[ChatCompletionUsage]
225
time_info: Optional[ChatCompletionTimeInfo]
226
227
class ChatCompletionChoice(BaseModel):
228
"""Individual completion choice."""
229
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]]
230
index: int
231
logprobs: Optional[ChatCompletionLogprobs]
232
message: ChatCompletionMessage
233
234
class ChatCompletionMessage(BaseModel):
235
"""Message in the completion response."""
236
content: Optional[str]
237
role: Literal["assistant"]
238
tool_calls: Optional[List[ChatCompletionMessageToolCall]]
239
240
class ChatCompletionUsage(BaseModel):
241
"""Token usage information."""
242
completion_tokens: int
243
prompt_tokens: int
244
total_tokens: int
245
prompt_tokens_details: Optional[ChatCompletionUsagePromptTokensDetails]
246
247
class ChatCompletionTimeInfo(BaseModel):
248
"""Timing information for the completion."""
249
queue_time: Optional[float]
250
prompt_time: Optional[float]
251
completion_time: Optional[float]
252
total_time: Optional[float]
253
```
254
255
### Streaming Response Types
256
257
```python { .api }
258
class ChatCompletionChunk(BaseModel):
259
"""Streaming chunk in chat completion."""
260
id: str
261
choices: List[ChatCompletionChunkChoice]
262
created: int
263
model: str
264
object: Literal["chat.completion.chunk"]
265
system_fingerprint: Optional[str]
266
usage: Optional[ChatCompletionUsage]
267
time_info: Optional[ChatCompletionTimeInfo]
268
269
class ChatCompletionChunkChoice(BaseModel):
270
"""Choice in streaming chunk."""
271
delta: ChatCompletionChunkDelta
272
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]]
273
index: int
274
logprobs: Optional[ChatCompletionLogprobs]
275
276
class ChatCompletionChunkDelta(BaseModel):
277
"""Delta information in streaming chunk."""
278
content: Optional[str]
279
role: Optional[Literal["assistant"]]
280
tool_calls: Optional[List[ChatCompletionChunkDeltaToolCall]]
281
```
282
283
## Usage Examples
284
285
### Basic Chat Completion
286
287
```python
288
from cerebras.cloud.sdk import Cerebras
289
290
client = Cerebras()
291
292
response = client.chat.completions.create(
293
model="llama3.1-70b",
294
messages=[
295
{"role": "system", "content": "You are a helpful assistant."},
296
{"role": "user", "content": "What is machine learning?"}
297
],
298
max_tokens=200,
299
temperature=0.7
300
)
301
302
print(response.choices[0].message.content)
303
print(f"Used {response.usage.total_tokens} tokens")
304
```
305
306
### Streaming Chat Completion
307
308
```python
309
from cerebras.cloud.sdk import Cerebras
310
311
client = Cerebras()
312
313
stream = client.chat.completions.create(
314
model="llama3.1-70b",
315
messages=[
316
{"role": "user", "content": "Tell me a short story"}
317
],
318
stream=True,
319
max_tokens=500
320
)
321
322
print("Story: ", end="")
323
for chunk in stream:
324
if chunk.choices[0].delta.content:
325
print(chunk.choices[0].delta.content, end="", flush=True)
326
print()
327
```
328
329
### Function Calling
330
331
```python
332
from cerebras.cloud.sdk import Cerebras
333
import json
334
335
client = Cerebras()
336
337
# Define a function
338
tools = [
339
{
340
"type": "function",
341
"function": {
342
"name": "get_weather",
343
"description": "Get weather information for a location",
344
"parameters": {
345
"type": "object",
346
"properties": {
347
"location": {
348
"type": "string",
349
"description": "The city and state/country"
350
},
351
"unit": {
352
"type": "string",
353
"enum": ["celsius", "fahrenheit"],
354
"description": "Temperature unit"
355
}
356
},
357
"required": ["location"]
358
}
359
}
360
}
361
]
362
363
response = client.chat.completions.create(
364
model="llama3.1-70b",
365
messages=[
366
{"role": "user", "content": "What's the weather like in San Francisco?"}
367
],
368
tools=tools,
369
tool_choice="auto"
370
)
371
372
# Check if the model wants to call a function
373
message = response.choices[0].message
374
if message.tool_calls:
375
tool_call = message.tool_calls[0]
376
function_name = tool_call.function.name
377
function_args = json.loads(tool_call.function.arguments)
378
print(f"Model wants to call {function_name} with args: {function_args}")
379
```
380
381
### Async Chat Completion
382
383
```python
384
import asyncio
385
from cerebras.cloud.sdk import AsyncCerebras
386
387
async def chat_example():
388
client = AsyncCerebras()
389
390
response = await client.chat.completions.create(
391
model="llama3.1-70b",
392
messages=[
393
{"role": "user", "content": "Explain quantum computing"}
394
],
395
max_tokens=300
396
)
397
398
print(response.choices[0].message.content)
399
400
await client.aclose()
401
402
asyncio.run(chat_example())
403
```
404
405
### Multiple Completions
406
407
```python
408
from cerebras.cloud.sdk import Cerebras
409
410
client = Cerebras()
411
412
response = client.chat.completions.create(
413
model="llama3.1-70b",
414
messages=[
415
{"role": "user", "content": "Write a creative opening line for a story"}
416
],
417
n=3, # Generate 3 different completions
418
max_tokens=50,
419
temperature=0.9
420
)
421
422
for i, choice in enumerate(response.choices):
423
print(f"Option {i+1}: {choice.message.content}")
424
```