0
# Core Completion API
1
2
Fundamental completion functionality that forms the foundation of LiteLLM's unified interface. These functions provide chat completion, text completion, and streaming support across 100+ LLM providers with OpenAI-compatible parameters.
3
4
## Capabilities
5
6
### Chat Completion
7
8
Primary function for conversational AI interactions using the messages format. Supports all OpenAI parameters and provider-specific extensions.
9
10
```python { .api }
11
def completion(
12
model: str,
13
messages: List[Dict[str, Any]],
14
# Standard OpenAI parameters
15
temperature: Optional[float] = None,
16
top_p: Optional[float] = None,
17
n: Optional[int] = None,
18
stream: Optional[bool] = None,
19
stop: Optional[Union[str, List[str]]] = None,
20
max_tokens: Optional[int] = None,
21
presence_penalty: Optional[float] = None,
22
frequency_penalty: Optional[float] = None,
23
logit_bias: Optional[Dict[str, float]] = None,
24
user: Optional[str] = None,
25
response_format: Optional[Dict[str, Any]] = None,
26
seed: Optional[int] = None,
27
# Function calling
28
tools: Optional[List[Dict[str, Any]]] = None,
29
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
30
functions: Optional[List[Dict[str, Any]]] = None,
31
function_call: Optional[Union[str, Dict[str, Any]]] = None,
32
# LiteLLM specific parameters
33
timeout: Optional[float] = None,
34
api_key: Optional[str] = None,
35
api_base: Optional[str] = None,
36
api_version: Optional[str] = None,
37
custom_llm_provider: Optional[str] = None,
38
# Streaming and caching
39
stream: Optional[bool] = None,
40
cache: Optional[Dict[str, Any]] = None,
41
# Provider-specific overrides
42
**kwargs
43
) -> Union[ModelResponse, Iterator[ModelResponseStream]]
44
"""
45
Create a chat completion using any supported LLM provider.
46
47
Args:
48
model (str): Model identifier (e.g., "gpt-4", "claude-3-sonnet-20240229")
49
messages (List[Dict[str, Any]]): Conversation messages in OpenAI format
50
temperature (Optional[float]): Sampling temperature (0.0 to 2.0)
51
max_tokens (Optional[int]): Maximum tokens to generate
52
stream (Optional[bool]): Enable streaming response
53
tools (Optional[List[Dict[str, Any]]]): Available function tools
54
tool_choice (Optional[Union[str, Dict[str, Any]]]): Tool selection strategy
55
timeout (Optional[float]): Request timeout in seconds
56
api_key (Optional[str]): Provider API key override
57
custom_llm_provider (Optional[str]): Force specific provider
58
59
Returns:
60
Union[ModelResponse, Iterator[ModelResponseStream]]: Completion response or stream
61
62
Raises:
63
AuthenticationError: Invalid API key or authentication failure
64
RateLimitError: Rate limit exceeded
65
ContextWindowExceededError: Input exceeds model's context window
66
InvalidRequestError: Invalid parameters or model not found
67
"""
68
```
69
70
### Async Chat Completion
71
72
Asynchronous version of the completion function for concurrent processing and improved performance.
73
74
```python { .api }
75
async def acompletion(
76
model: str,
77
messages: List[Dict[str, Any]],
78
# All same parameters as completion()
79
**kwargs
80
) -> Union[ModelResponse, AsyncIterator[ModelResponseStream]]
81
"""
82
Async version of completion() for concurrent LLM requests.
83
84
Args:
85
Same as completion() function
86
87
Returns:
88
Union[ModelResponse, AsyncIterator[ModelResponseStream]]: Async completion response or stream
89
"""
90
```
91
92
### Text Completion
93
94
Legacy text completion interface for prompt-based models and compatibility with older model types.
95
96
```python { .api }
97
def text_completion(
98
model: str,
99
prompt: str,
100
# Standard parameters
101
temperature: Optional[float] = None,
102
max_tokens: Optional[int] = None,
103
top_p: Optional[float] = None,
104
frequency_penalty: Optional[float] = None,
105
presence_penalty: Optional[float] = None,
106
stop: Optional[Union[str, List[str]]] = None,
107
stream: Optional[bool] = None,
108
n: Optional[int] = None,
109
logit_bias: Optional[Dict[str, float]] = None,
110
# LiteLLM specific
111
timeout: Optional[float] = None,
112
api_key: Optional[str] = None,
113
api_base: Optional[str] = None,
114
custom_llm_provider: Optional[str] = None,
115
**kwargs
116
) -> Union[TextCompletionResponse, Iterator[TextCompletionResponse]]
117
"""
118
Create a text completion using prompt-based models.
119
120
Args:
121
model (str): Model identifier
122
prompt (str): Input text prompt
123
temperature (Optional[float]): Sampling temperature
124
max_tokens (Optional[int]): Maximum tokens to generate
125
stream (Optional[bool]): Enable streaming response
126
stop (Optional[Union[str, List[str]]]): Stop sequences
127
timeout (Optional[float]): Request timeout in seconds
128
129
Returns:
130
Union[TextCompletionResponse, Iterator[TextCompletionResponse]]: Text completion response
131
"""
132
```
133
134
### Async Text Completion
135
136
Asynchronous version of text completion for concurrent processing.
137
138
```python { .api }
139
async def atext_completion(
140
model: str,
141
prompt: str,
142
**kwargs
143
) -> Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]
144
"""
145
Async version of text_completion() for concurrent requests.
146
147
Args:
148
Same as text_completion() function
149
150
Returns:
151
Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]: Async text completion response
152
"""
153
```
154
155
## Message Format
156
157
```python { .api }
158
class Message:
159
"""OpenAI-compatible message format"""
160
role: Literal["system", "user", "assistant", "tool"]
161
content: Optional[Union[str, List[Dict[str, Any]]]]
162
name: Optional[str] = None
163
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
164
tool_call_id: Optional[str] = None
165
166
class ChatCompletionMessageToolCall:
167
id: str
168
type: Literal["function"]
169
function: Function
170
171
class Function:
172
name: str
173
arguments: str
174
```
175
176
## Response Objects
177
178
```python { .api }
179
class ModelResponse(BaseLiteLLMOpenAIResponseObject):
180
"""Main completion response object"""
181
id: str
182
choices: List[Choices]
183
created: int
184
model: Optional[str] = None
185
object: str = "chat.completion"
186
system_fingerprint: Optional[str] = None
187
usage: Optional[Usage] = None
188
_hidden_params: HiddenParams = {}
189
_response_ms: Optional[float] = None
190
191
class ModelResponseStream(BaseLiteLLMOpenAIResponseObject):
192
"""Streaming completion response chunk"""
193
id: str
194
choices: List[StreamingChoices]
195
created: int
196
model: Optional[str] = None
197
object: str = "chat.completion.chunk"
198
199
class Choices:
200
finish_reason: Optional[Literal["stop", "length", "function_call", "tool_calls", "content_filter"]] = None
201
index: int = 0
202
message: Optional[Message] = None
203
logprobs: Optional[ChoiceLogprobs] = None
204
205
class StreamingChoices:
206
finish_reason: Optional[str] = None
207
index: int = 0
208
delta: Optional[Delta] = None
209
logprobs: Optional[ChoiceLogprobs] = None
210
211
class Delta:
212
content: Optional[str] = None
213
role: Optional[str] = None
214
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
215
216
class Usage:
217
prompt_tokens: int
218
completion_tokens: Optional[int] = None
219
total_tokens: int
220
cache_creation_input_tokens: Optional[int] = None
221
cache_read_input_tokens: Optional[int] = None
222
223
class TextCompletionResponse(BaseLiteLLMOpenAIResponseObject):
224
"""Text completion response object"""
225
id: str
226
choices: List[TextChoices]
227
created: int
228
model: Optional[str] = None
229
object: str = "text_completion"
230
usage: Optional[Usage] = None
231
232
class TextChoices:
233
finish_reason: Optional[str] = None
234
index: int = 0
235
logprobs: Optional[TextChoicesLogprobs] = None
236
text: str
237
```
238
239
## Usage Examples
240
241
### Basic Chat Completion
242
243
```python
244
import litellm
245
246
# Simple completion
247
response = litellm.completion(
248
model="gpt-4",
249
messages=[
250
{"role": "system", "content": "You are a helpful assistant."},
251
{"role": "user", "content": "What is machine learning?"}
252
]
253
)
254
255
print(response.choices[0].message.content)
256
```
257
258
### Streaming Completion
259
260
```python
261
response = litellm.completion(
262
model="gpt-3.5-turbo",
263
messages=[{"role": "user", "content": "Write a story about AI"}],
264
stream=True,
265
max_tokens=500
266
)
267
268
for chunk in response:
269
if chunk.choices[0].delta.content:
270
print(chunk.choices[0].delta.content, end="")
271
```
272
273
### Function Calling
274
275
```python
276
tools = [{
277
"type": "function",
278
"function": {
279
"name": "get_weather",
280
"description": "Get current weather for a location",
281
"parameters": {
282
"type": "object",
283
"properties": {
284
"location": {"type": "string", "description": "City name"}
285
},
286
"required": ["location"]
287
}
288
}
289
}]
290
291
response = litellm.completion(
292
model="gpt-4",
293
messages=[{"role": "user", "content": "What's the weather in Paris?"}],
294
tools=tools,
295
tool_choice="auto"
296
)
297
298
if response.choices[0].message.tool_calls:
299
tool_call = response.choices[0].message.tool_calls[0]
300
print(f"Function: {tool_call.function.name}")
301
print(f"Arguments: {tool_call.function.arguments}")
302
```
303
304
### Multi-modal Completion
305
306
```python
307
# Vision model with image
308
messages = [{
309
"role": "user",
310
"content": [
311
{"type": "text", "text": "What's in this image?"},
312
{
313
"type": "image_url",
314
"image_url": {"url": "https://example.com/image.jpg"}
315
}
316
]
317
}]
318
319
response = litellm.completion(
320
model="gpt-4-vision-preview",
321
messages=messages
322
)
323
```
324
325
### Async Completion with Multiple Models
326
327
```python
328
import asyncio
329
330
async def test_multiple_models():
331
tasks = [
332
litellm.acompletion(
333
model="gpt-4",
334
messages=[{"role": "user", "content": "Hello from GPT-4"}]
335
),
336
litellm.acompletion(
337
model="claude-3-sonnet-20240229",
338
messages=[{"role": "user", "content": "Hello from Claude"}]
339
)
340
]
341
342
responses = await asyncio.gather(*tasks)
343
for i, response in enumerate(responses):
344
print(f"Response {i}: {response.choices[0].message.content}")
345
346
asyncio.run(test_multiple_models())
347
```
348
349
### Provider-specific Parameters
350
351
```python
352
# Anthropic Claude with specific parameters
353
response = litellm.completion(
354
model="claude-3-sonnet-20240229",
355
messages=[{"role": "user", "content": "Explain quantum physics"}],
356
max_tokens=1000,
357
temperature=0.7,
358
# Anthropic-specific
359
top_k=40,
360
custom_llm_provider="anthropic"
361
)
362
363
# Cohere with custom parameters
364
response = litellm.completion(
365
model="command-nightly",
366
messages=[{"role": "user", "content": "Write a summary"}],
367
# Cohere-specific
368
p=0.75,
369
k=0,
370
custom_llm_provider="cohere"
371
)
372
```
373
374
### Error Handling
375
376
```python
377
try:
378
response = litellm.completion(
379
model="gpt-4",
380
messages=[{"role": "user", "content": "Hello"}],
381
timeout=30
382
)
383
except litellm.RateLimitError as e:
384
print(f"Rate limit exceeded: {e}")
385
except litellm.AuthenticationError as e:
386
print(f"Authentication failed: {e}")
387
except litellm.ContextWindowExceededError as e:
388
print(f"Context window exceeded: {e}")
389
except Exception as e:
390
print(f"Unexpected error: {e}")
391
```