Tessl Tile for pypi/litellm@1.76.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-completion.md exceptions.md index.md other-apis.md providers.md router.md utilities.md

core-completion.mddocs/

0
# Core Completion API
1

2
Fundamental completion functionality that forms the foundation of LiteLLM's unified interface. These functions provide chat completion, text completion, and streaming support across 100+ LLM providers with OpenAI-compatible parameters.
3

4
## Capabilities
5

6
### Chat Completion
7

8
Primary function for conversational AI interactions using the messages format. Supports all OpenAI parameters and provider-specific extensions.
9

10
```python { .api }
11
def completion(
12
    model: str,
13
    messages: List[Dict[str, Any]],
14
    # Standard OpenAI parameters
15
    temperature: Optional[float] = None,
16
    top_p: Optional[float] = None,
17
    n: Optional[int] = None,
18
    stream: Optional[bool] = None,
19
    stop: Optional[Union[str, List[str]]] = None,
20
    max_tokens: Optional[int] = None,
21
    presence_penalty: Optional[float] = None,
22
    frequency_penalty: Optional[float] = None,
23
    logit_bias: Optional[Dict[str, float]] = None,
24
    user: Optional[str] = None,
25
    response_format: Optional[Dict[str, Any]] = None,
26
    seed: Optional[int] = None,
27
    # Function calling
28
    tools: Optional[List[Dict[str, Any]]] = None,
29
    tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
30
    functions: Optional[List[Dict[str, Any]]] = None,
31
    function_call: Optional[Union[str, Dict[str, Any]]] = None,
32
    # LiteLLM specific parameters
33
    timeout: Optional[float] = None,
34
    api_key: Optional[str] = None,
35
    api_base: Optional[str] = None,
36
    api_version: Optional[str] = None,
37
    custom_llm_provider: Optional[str] = None,
38
    # Streaming and caching
39
    stream: Optional[bool] = None,
40
    cache: Optional[Dict[str, Any]] = None,
41
    # Provider-specific overrides
42
    **kwargs
43
) -> Union[ModelResponse, Iterator[ModelResponseStream]]
44
    """
45
    Create a chat completion using any supported LLM provider.
46

47
    Args:
48
        model (str): Model identifier (e.g., "gpt-4", "claude-3-sonnet-20240229")
49
        messages (List[Dict[str, Any]]): Conversation messages in OpenAI format
50
        temperature (Optional[float]): Sampling temperature (0.0 to 2.0)
51
        max_tokens (Optional[int]): Maximum tokens to generate
52
        stream (Optional[bool]): Enable streaming response
53
        tools (Optional[List[Dict[str, Any]]]): Available function tools
54
        tool_choice (Optional[Union[str, Dict[str, Any]]]): Tool selection strategy
55
        timeout (Optional[float]): Request timeout in seconds
56
        api_key (Optional[str]): Provider API key override
57
        custom_llm_provider (Optional[str]): Force specific provider
58

59
    Returns:
60
        Union[ModelResponse, Iterator[ModelResponseStream]]: Completion response or stream
61
    
62
    Raises:
63
        AuthenticationError: Invalid API key or authentication failure
64
        RateLimitError: Rate limit exceeded
65
        ContextWindowExceededError: Input exceeds model's context window
66
        InvalidRequestError: Invalid parameters or model not found
67
    """
68
```
69

70
### Async Chat Completion
71

72
Asynchronous version of the completion function for concurrent processing and improved performance.
73

74
```python { .api }
75
async def acompletion(
76
    model: str,
77
    messages: List[Dict[str, Any]],
78
    # All same parameters as completion()
79
    **kwargs
80
) -> Union[ModelResponse, AsyncIterator[ModelResponseStream]]
81
    """
82
    Async version of completion() for concurrent LLM requests.
83

84
    Args:
85
        Same as completion() function
86

87
    Returns:
88
        Union[ModelResponse, AsyncIterator[ModelResponseStream]]: Async completion response or stream
89
    """
90
```
91

92
### Text Completion
93

94
Legacy text completion interface for prompt-based models and compatibility with older model types.
95

96
```python { .api }
97
def text_completion(
98
    model: str,
99
    prompt: str,
100
    # Standard parameters
101
    temperature: Optional[float] = None,
102
    max_tokens: Optional[int] = None,
103
    top_p: Optional[float] = None,
104
    frequency_penalty: Optional[float] = None,
105
    presence_penalty: Optional[float] = None,
106
    stop: Optional[Union[str, List[str]]] = None,
107
    stream: Optional[bool] = None,
108
    n: Optional[int] = None,
109
    logit_bias: Optional[Dict[str, float]] = None,
110
    # LiteLLM specific
111
    timeout: Optional[float] = None,
112
    api_key: Optional[str] = None,
113
    api_base: Optional[str] = None,
114
    custom_llm_provider: Optional[str] = None,
115
    **kwargs
116
) -> Union[TextCompletionResponse, Iterator[TextCompletionResponse]]
117
    """
118
    Create a text completion using prompt-based models.
119

120
    Args:
121
        model (str): Model identifier
122
        prompt (str): Input text prompt
123
        temperature (Optional[float]): Sampling temperature
124
        max_tokens (Optional[int]): Maximum tokens to generate
125
        stream (Optional[bool]): Enable streaming response
126
        stop (Optional[Union[str, List[str]]]): Stop sequences
127
        timeout (Optional[float]): Request timeout in seconds
128

129
    Returns:
130
        Union[TextCompletionResponse, Iterator[TextCompletionResponse]]: Text completion response
131
    """
132
```
133

134
### Async Text Completion
135

136
Asynchronous version of text completion for concurrent processing.
137

138
```python { .api }
139
async def atext_completion(
140
    model: str,
141
    prompt: str,
142
    **kwargs
143
) -> Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]
144
    """
145
    Async version of text_completion() for concurrent requests.
146

147
    Args:
148
        Same as text_completion() function
149

150
    Returns:
151
        Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]: Async text completion response
152
    """
153
```
154

155
## Message Format
156

157
```python { .api }
158
class Message:
159
    """OpenAI-compatible message format"""
160
    role: Literal["system", "user", "assistant", "tool"]
161
    content: Optional[Union[str, List[Dict[str, Any]]]]
162
    name: Optional[str] = None
163
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
164
    tool_call_id: Optional[str] = None
165

166
class ChatCompletionMessageToolCall:
167
    id: str
168
    type: Literal["function"]
169
    function: Function
170

171
class Function:
172
    name: str
173
    arguments: str
174
```
175

176
## Response Objects
177

178
```python { .api }
179
class ModelResponse(BaseLiteLLMOpenAIResponseObject):
180
    """Main completion response object"""
181
    id: str
182
    choices: List[Choices]
183
    created: int
184
    model: Optional[str] = None
185
    object: str = "chat.completion"
186
    system_fingerprint: Optional[str] = None
187
    usage: Optional[Usage] = None
188
    _hidden_params: HiddenParams = {}
189
    _response_ms: Optional[float] = None
190

191
class ModelResponseStream(BaseLiteLLMOpenAIResponseObject):
192
    """Streaming completion response chunk"""
193
    id: str
194
    choices: List[StreamingChoices]
195
    created: int
196
    model: Optional[str] = None
197
    object: str = "chat.completion.chunk"
198

199
class Choices:
200
    finish_reason: Optional[Literal["stop", "length", "function_call", "tool_calls", "content_filter"]] = None
201
    index: int = 0
202
    message: Optional[Message] = None
203
    logprobs: Optional[ChoiceLogprobs] = None
204

205
class StreamingChoices:
206
    finish_reason: Optional[str] = None
207
    index: int = 0
208
    delta: Optional[Delta] = None
209
    logprobs: Optional[ChoiceLogprobs] = None
210

211
class Delta:
212
    content: Optional[str] = None
213
    role: Optional[str] = None
214
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
215

216
class Usage:
217
    prompt_tokens: int
218
    completion_tokens: Optional[int] = None
219
    total_tokens: int
220
    cache_creation_input_tokens: Optional[int] = None
221
    cache_read_input_tokens: Optional[int] = None
222

223
class TextCompletionResponse(BaseLiteLLMOpenAIResponseObject):
224
    """Text completion response object"""
225
    id: str
226
    choices: List[TextChoices]
227
    created: int
228
    model: Optional[str] = None
229
    object: str = "text_completion"
230
    usage: Optional[Usage] = None
231

232
class TextChoices:
233
    finish_reason: Optional[str] = None
234
    index: int = 0
235
    logprobs: Optional[TextChoicesLogprobs] = None
236
    text: str
237
```
238

239
## Usage Examples
240

241
### Basic Chat Completion
242

243
```python
244
import litellm
245

246
# Simple completion
247
response = litellm.completion(
248
    model="gpt-4",
249
    messages=[
250
        {"role": "system", "content": "You are a helpful assistant."},
251
        {"role": "user", "content": "What is machine learning?"}
252
    ]
253
)
254

255
print(response.choices[0].message.content)
256
```
257

258
### Streaming Completion
259

260
```python
261
response = litellm.completion(
262
    model="gpt-3.5-turbo",
263
    messages=[{"role": "user", "content": "Write a story about AI"}],
264
    stream=True,
265
    max_tokens=500
266
)
267

268
for chunk in response:
269
    if chunk.choices[0].delta.content:
270
        print(chunk.choices[0].delta.content, end="")
271
```
272

273
### Function Calling
274

275
```python
276
tools = [{
277
    "type": "function",
278
    "function": {
279
        "name": "get_weather",
280
        "description": "Get current weather for a location",
281
        "parameters": {
282
            "type": "object",
283
            "properties": {
284
                "location": {"type": "string", "description": "City name"}
285
            },
286
            "required": ["location"]
287
        }
288
    }
289
}]
290

291
response = litellm.completion(
292
    model="gpt-4",
293
    messages=[{"role": "user", "content": "What's the weather in Paris?"}],
294
    tools=tools,
295
    tool_choice="auto"
296
)
297

298
if response.choices[0].message.tool_calls:
299
    tool_call = response.choices[0].message.tool_calls[0]
300
    print(f"Function: {tool_call.function.name}")
301
    print(f"Arguments: {tool_call.function.arguments}")
302
```
303

304
### Multi-modal Completion
305

306
```python
307
# Vision model with image
308
messages = [{
309
    "role": "user",
310
    "content": [
311
        {"type": "text", "text": "What's in this image?"},
312
        {
313
            "type": "image_url",
314
            "image_url": {"url": "https://example.com/image.jpg"}
315
        }
316
    ]
317
}]
318

319
response = litellm.completion(
320
    model="gpt-4-vision-preview",
321
    messages=messages
322
)
323
```
324

325
### Async Completion with Multiple Models
326

327
```python
328
import asyncio
329

330
async def test_multiple_models():
331
    tasks = [
332
        litellm.acompletion(
333
            model="gpt-4",
334
            messages=[{"role": "user", "content": "Hello from GPT-4"}]
335
        ),
336
        litellm.acompletion(
337
            model="claude-3-sonnet-20240229",
338
            messages=[{"role": "user", "content": "Hello from Claude"}]
339
        )
340
    ]
341
    
342
    responses = await asyncio.gather(*tasks)
343
    for i, response in enumerate(responses):
344
        print(f"Response {i}: {response.choices[0].message.content}")
345

346
asyncio.run(test_multiple_models())
347
```
348

349
### Provider-specific Parameters
350

351
```python
352
# Anthropic Claude with specific parameters
353
response = litellm.completion(
354
    model="claude-3-sonnet-20240229",
355
    messages=[{"role": "user", "content": "Explain quantum physics"}],
356
    max_tokens=1000,
357
    temperature=0.7,
358
    # Anthropic-specific
359
    top_k=40,
360
    custom_llm_provider="anthropic"
361
)
362

363
# Cohere with custom parameters
364
response = litellm.completion(
365
    model="command-nightly",
366
    messages=[{"role": "user", "content": "Write a summary"}],
367
    # Cohere-specific
368
    p=0.75,
369
    k=0,
370
    custom_llm_provider="cohere"
371
)
372
```
373

374
### Error Handling
375

376
```python
377
try:
378
    response = litellm.completion(
379
        model="gpt-4",
380
        messages=[{"role": "user", "content": "Hello"}],
381
        timeout=30
382
    )
383
except litellm.RateLimitError as e:
384
    print(f"Rate limit exceeded: {e}")
385
except litellm.AuthenticationError as e:
386
    print(f"Authentication failed: {e}")
387
except litellm.ContextWindowExceededError as e:
388
    print(f"Context window exceeded: {e}")
389
except Exception as e:
390
    print(f"Unexpected error: {e}")
391
```

Version

Tile

Files

core-completion.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

core-completion.mddocs/