or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-completion.mdexceptions.mdindex.mdother-apis.mdproviders.mdrouter.mdutilities.md

core-completion.mddocs/

0

# Core Completion API

1

2

Fundamental completion functionality that forms the foundation of LiteLLM's unified interface. These functions provide chat completion, text completion, and streaming support across 100+ LLM providers with OpenAI-compatible parameters.

3

4

## Capabilities

5

6

### Chat Completion

7

8

Primary function for conversational AI interactions using the messages format. Supports all OpenAI parameters and provider-specific extensions.

9

10

```python { .api }

11

def completion(

12

model: str,

13

messages: List[Dict[str, Any]],

14

# Standard OpenAI parameters

15

temperature: Optional[float] = None,

16

top_p: Optional[float] = None,

17

n: Optional[int] = None,

18

stream: Optional[bool] = None,

19

stop: Optional[Union[str, List[str]]] = None,

20

max_tokens: Optional[int] = None,

21

presence_penalty: Optional[float] = None,

22

frequency_penalty: Optional[float] = None,

23

logit_bias: Optional[Dict[str, float]] = None,

24

user: Optional[str] = None,

25

response_format: Optional[Dict[str, Any]] = None,

26

seed: Optional[int] = None,

27

# Function calling

28

tools: Optional[List[Dict[str, Any]]] = None,

29

tool_choice: Optional[Union[str, Dict[str, Any]]] = None,

30

functions: Optional[List[Dict[str, Any]]] = None,

31

function_call: Optional[Union[str, Dict[str, Any]]] = None,

32

# LiteLLM specific parameters

33

timeout: Optional[float] = None,

34

api_key: Optional[str] = None,

35

api_base: Optional[str] = None,

36

api_version: Optional[str] = None,

37

custom_llm_provider: Optional[str] = None,

38

# Streaming and caching

39

stream: Optional[bool] = None,

40

cache: Optional[Dict[str, Any]] = None,

41

# Provider-specific overrides

42

**kwargs

43

) -> Union[ModelResponse, Iterator[ModelResponseStream]]

44

"""

45

Create a chat completion using any supported LLM provider.

46

47

Args:

48

model (str): Model identifier (e.g., "gpt-4", "claude-3-sonnet-20240229")

49

messages (List[Dict[str, Any]]): Conversation messages in OpenAI format

50

temperature (Optional[float]): Sampling temperature (0.0 to 2.0)

51

max_tokens (Optional[int]): Maximum tokens to generate

52

stream (Optional[bool]): Enable streaming response

53

tools (Optional[List[Dict[str, Any]]]): Available function tools

54

tool_choice (Optional[Union[str, Dict[str, Any]]]): Tool selection strategy

55

timeout (Optional[float]): Request timeout in seconds

56

api_key (Optional[str]): Provider API key override

57

custom_llm_provider (Optional[str]): Force specific provider

58

59

Returns:

60

Union[ModelResponse, Iterator[ModelResponseStream]]: Completion response or stream

61

62

Raises:

63

AuthenticationError: Invalid API key or authentication failure

64

RateLimitError: Rate limit exceeded

65

ContextWindowExceededError: Input exceeds model's context window

66

InvalidRequestError: Invalid parameters or model not found

67

"""

68

```

69

70

### Async Chat Completion

71

72

Asynchronous version of the completion function for concurrent processing and improved performance.

73

74

```python { .api }

75

async def acompletion(

76

model: str,

77

messages: List[Dict[str, Any]],

78

# All same parameters as completion()

79

**kwargs

80

) -> Union[ModelResponse, AsyncIterator[ModelResponseStream]]

81

"""

82

Async version of completion() for concurrent LLM requests.

83

84

Args:

85

Same as completion() function

86

87

Returns:

88

Union[ModelResponse, AsyncIterator[ModelResponseStream]]: Async completion response or stream

89

"""

90

```

91

92

### Text Completion

93

94

Legacy text completion interface for prompt-based models and compatibility with older model types.

95

96

```python { .api }

97

def text_completion(

98

model: str,

99

prompt: str,

100

# Standard parameters

101

temperature: Optional[float] = None,

102

max_tokens: Optional[int] = None,

103

top_p: Optional[float] = None,

104

frequency_penalty: Optional[float] = None,

105

presence_penalty: Optional[float] = None,

106

stop: Optional[Union[str, List[str]]] = None,

107

stream: Optional[bool] = None,

108

n: Optional[int] = None,

109

logit_bias: Optional[Dict[str, float]] = None,

110

# LiteLLM specific

111

timeout: Optional[float] = None,

112

api_key: Optional[str] = None,

113

api_base: Optional[str] = None,

114

custom_llm_provider: Optional[str] = None,

115

**kwargs

116

) -> Union[TextCompletionResponse, Iterator[TextCompletionResponse]]

117

"""

118

Create a text completion using prompt-based models.

119

120

Args:

121

model (str): Model identifier

122

prompt (str): Input text prompt

123

temperature (Optional[float]): Sampling temperature

124

max_tokens (Optional[int]): Maximum tokens to generate

125

stream (Optional[bool]): Enable streaming response

126

stop (Optional[Union[str, List[str]]]): Stop sequences

127

timeout (Optional[float]): Request timeout in seconds

128

129

Returns:

130

Union[TextCompletionResponse, Iterator[TextCompletionResponse]]: Text completion response

131

"""

132

```

133

134

### Async Text Completion

135

136

Asynchronous version of text completion for concurrent processing.

137

138

```python { .api }

139

async def atext_completion(

140

model: str,

141

prompt: str,

142

**kwargs

143

) -> Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]

144

"""

145

Async version of text_completion() for concurrent requests.

146

147

Args:

148

Same as text_completion() function

149

150

Returns:

151

Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]: Async text completion response

152

"""

153

```

154

155

## Message Format

156

157

```python { .api }

158

class Message:

159

"""OpenAI-compatible message format"""

160

role: Literal["system", "user", "assistant", "tool"]

161

content: Optional[Union[str, List[Dict[str, Any]]]]

162

name: Optional[str] = None

163

tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None

164

tool_call_id: Optional[str] = None

165

166

class ChatCompletionMessageToolCall:

167

id: str

168

type: Literal["function"]

169

function: Function

170

171

class Function:

172

name: str

173

arguments: str

174

```

175

176

## Response Objects

177

178

```python { .api }

179

class ModelResponse(BaseLiteLLMOpenAIResponseObject):

180

"""Main completion response object"""

181

id: str

182

choices: List[Choices]

183

created: int

184

model: Optional[str] = None

185

object: str = "chat.completion"

186

system_fingerprint: Optional[str] = None

187

usage: Optional[Usage] = None

188

_hidden_params: HiddenParams = {}

189

_response_ms: Optional[float] = None

190

191

class ModelResponseStream(BaseLiteLLMOpenAIResponseObject):

192

"""Streaming completion response chunk"""

193

id: str

194

choices: List[StreamingChoices]

195

created: int

196

model: Optional[str] = None

197

object: str = "chat.completion.chunk"

198

199

class Choices:

200

finish_reason: Optional[Literal["stop", "length", "function_call", "tool_calls", "content_filter"]] = None

201

index: int = 0

202

message: Optional[Message] = None

203

logprobs: Optional[ChoiceLogprobs] = None

204

205

class StreamingChoices:

206

finish_reason: Optional[str] = None

207

index: int = 0

208

delta: Optional[Delta] = None

209

logprobs: Optional[ChoiceLogprobs] = None

210

211

class Delta:

212

content: Optional[str] = None

213

role: Optional[str] = None

214

tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None

215

216

class Usage:

217

prompt_tokens: int

218

completion_tokens: Optional[int] = None

219

total_tokens: int

220

cache_creation_input_tokens: Optional[int] = None

221

cache_read_input_tokens: Optional[int] = None

222

223

class TextCompletionResponse(BaseLiteLLMOpenAIResponseObject):

224

"""Text completion response object"""

225

id: str

226

choices: List[TextChoices]

227

created: int

228

model: Optional[str] = None

229

object: str = "text_completion"

230

usage: Optional[Usage] = None

231

232

class TextChoices:

233

finish_reason: Optional[str] = None

234

index: int = 0

235

logprobs: Optional[TextChoicesLogprobs] = None

236

text: str

237

```

238

239

## Usage Examples

240

241

### Basic Chat Completion

242

243

```python

244

import litellm

245

246

# Simple completion

247

response = litellm.completion(

248

model="gpt-4",

249

messages=[

250

{"role": "system", "content": "You are a helpful assistant."},

251

{"role": "user", "content": "What is machine learning?"}

252

]

253

)

254

255

print(response.choices[0].message.content)

256

```

257

258

### Streaming Completion

259

260

```python

261

response = litellm.completion(

262

model="gpt-3.5-turbo",

263

messages=[{"role": "user", "content": "Write a story about AI"}],

264

stream=True,

265

max_tokens=500

266

)

267

268

for chunk in response:

269

if chunk.choices[0].delta.content:

270

print(chunk.choices[0].delta.content, end="")

271

```

272

273

### Function Calling

274

275

```python

276

tools = [{

277

"type": "function",

278

"function": {

279

"name": "get_weather",

280

"description": "Get current weather for a location",

281

"parameters": {

282

"type": "object",

283

"properties": {

284

"location": {"type": "string", "description": "City name"}

285

},

286

"required": ["location"]

287

}

288

}

289

}]

290

291

response = litellm.completion(

292

model="gpt-4",

293

messages=[{"role": "user", "content": "What's the weather in Paris?"}],

294

tools=tools,

295

tool_choice="auto"

296

)

297

298

if response.choices[0].message.tool_calls:

299

tool_call = response.choices[0].message.tool_calls[0]

300

print(f"Function: {tool_call.function.name}")

301

print(f"Arguments: {tool_call.function.arguments}")

302

```

303

304

### Multi-modal Completion

305

306

```python

307

# Vision model with image

308

messages = [{

309

"role": "user",

310

"content": [

311

{"type": "text", "text": "What's in this image?"},

312

{

313

"type": "image_url",

314

"image_url": {"url": "https://example.com/image.jpg"}

315

}

316

]

317

}]

318

319

response = litellm.completion(

320

model="gpt-4-vision-preview",

321

messages=messages

322

)

323

```

324

325

### Async Completion with Multiple Models

326

327

```python

328

import asyncio

329

330

async def test_multiple_models():

331

tasks = [

332

litellm.acompletion(

333

model="gpt-4",

334

messages=[{"role": "user", "content": "Hello from GPT-4"}]

335

),

336

litellm.acompletion(

337

model="claude-3-sonnet-20240229",

338

messages=[{"role": "user", "content": "Hello from Claude"}]

339

)

340

]

341

342

responses = await asyncio.gather(*tasks)

343

for i, response in enumerate(responses):

344

print(f"Response {i}: {response.choices[0].message.content}")

345

346

asyncio.run(test_multiple_models())

347

```

348

349

### Provider-specific Parameters

350

351

```python

352

# Anthropic Claude with specific parameters

353

response = litellm.completion(

354

model="claude-3-sonnet-20240229",

355

messages=[{"role": "user", "content": "Explain quantum physics"}],

356

max_tokens=1000,

357

temperature=0.7,

358

# Anthropic-specific

359

top_k=40,

360

custom_llm_provider="anthropic"

361

)

362

363

# Cohere with custom parameters

364

response = litellm.completion(

365

model="command-nightly",

366

messages=[{"role": "user", "content": "Write a summary"}],

367

# Cohere-specific

368

p=0.75,

369

k=0,

370

custom_llm_provider="cohere"

371

)

372

```

373

374

### Error Handling

375

376

```python

377

try:

378

response = litellm.completion(

379

model="gpt-4",

380

messages=[{"role": "user", "content": "Hello"}],

381

timeout=30

382

)

383

except litellm.RateLimitError as e:

384

print(f"Rate limit exceeded: {e}")

385

except litellm.AuthenticationError as e:

386

print(f"Authentication failed: {e}")

387

except litellm.ContextWindowExceededError as e:

388

print(f"Context window exceeded: {e}")

389

except Exception as e:

390

print(f"Unexpected error: {e}")

391

```