Library to easily interface with LLM API providers
—
Fundamental completion functionality that forms the foundation of LiteLLM's unified interface. These functions provide chat completion, text completion, and streaming support across 100+ LLM providers with OpenAI-compatible parameters.
Primary function for conversational AI interactions using the messages format. Supports all OpenAI parameters and provider-specific extensions.
def completion(
model: str,
messages: List[Dict[str, Any]],
# Standard OpenAI parameters
temperature: Optional[float] = None,
top_p: Optional[float] = None,
n: Optional[int] = None,
stream: Optional[bool] = None,
stop: Optional[Union[str, List[str]]] = None,
max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
logit_bias: Optional[Dict[str, float]] = None,
user: Optional[str] = None,
response_format: Optional[Dict[str, Any]] = None,
seed: Optional[int] = None,
# Function calling
tools: Optional[List[Dict[str, Any]]] = None,
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
functions: Optional[List[Dict[str, Any]]] = None,
function_call: Optional[Union[str, Dict[str, Any]]] = None,
# LiteLLM specific parameters
timeout: Optional[float] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
custom_llm_provider: Optional[str] = None,
# Streaming and caching
stream: Optional[bool] = None,
cache: Optional[Dict[str, Any]] = None,
# Provider-specific overrides
**kwargs
) -> Union[ModelResponse, Iterator[ModelResponseStream]]
"""
Create a chat completion using any supported LLM provider.
Args:
model (str): Model identifier (e.g., "gpt-4", "claude-3-sonnet-20240229")
messages (List[Dict[str, Any]]): Conversation messages in OpenAI format
temperature (Optional[float]): Sampling temperature (0.0 to 2.0)
max_tokens (Optional[int]): Maximum tokens to generate
stream (Optional[bool]): Enable streaming response
tools (Optional[List[Dict[str, Any]]]): Available function tools
tool_choice (Optional[Union[str, Dict[str, Any]]]): Tool selection strategy
timeout (Optional[float]): Request timeout in seconds
api_key (Optional[str]): Provider API key override
custom_llm_provider (Optional[str]): Force specific provider
Returns:
Union[ModelResponse, Iterator[ModelResponseStream]]: Completion response or stream
Raises:
AuthenticationError: Invalid API key or authentication failure
RateLimitError: Rate limit exceeded
ContextWindowExceededError: Input exceeds model's context window
InvalidRequestError: Invalid parameters or model not found
"""Asynchronous version of the completion function for concurrent processing and improved performance.
async def acompletion(
model: str,
messages: List[Dict[str, Any]],
# All same parameters as completion()
**kwargs
) -> Union[ModelResponse, AsyncIterator[ModelResponseStream]]
"""
Async version of completion() for concurrent LLM requests.
Args:
Same as completion() function
Returns:
Union[ModelResponse, AsyncIterator[ModelResponseStream]]: Async completion response or stream
"""Legacy text completion interface for prompt-based models and compatibility with older model types.
def text_completion(
model: str,
prompt: str,
# Standard parameters
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
frequency_penalty: Optional[float] = None,
presence_penalty: Optional[float] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
n: Optional[int] = None,
logit_bias: Optional[Dict[str, float]] = None,
# LiteLLM specific
timeout: Optional[float] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
custom_llm_provider: Optional[str] = None,
**kwargs
) -> Union[TextCompletionResponse, Iterator[TextCompletionResponse]]
"""
Create a text completion using prompt-based models.
Args:
model (str): Model identifier
prompt (str): Input text prompt
temperature (Optional[float]): Sampling temperature
max_tokens (Optional[int]): Maximum tokens to generate
stream (Optional[bool]): Enable streaming response
stop (Optional[Union[str, List[str]]]): Stop sequences
timeout (Optional[float]): Request timeout in seconds
Returns:
Union[TextCompletionResponse, Iterator[TextCompletionResponse]]: Text completion response
"""Asynchronous version of text completion for concurrent processing.
async def atext_completion(
model: str,
prompt: str,
**kwargs
) -> Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]
"""
Async version of text_completion() for concurrent requests.
Args:
Same as text_completion() function
Returns:
Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]: Async text completion response
"""class Message:
"""OpenAI-compatible message format"""
role: Literal["system", "user", "assistant", "tool"]
content: Optional[Union[str, List[Dict[str, Any]]]]
name: Optional[str] = None
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
tool_call_id: Optional[str] = None
class ChatCompletionMessageToolCall:
id: str
type: Literal["function"]
function: Function
class Function:
name: str
arguments: strclass ModelResponse(BaseLiteLLMOpenAIResponseObject):
"""Main completion response object"""
id: str
choices: List[Choices]
created: int
model: Optional[str] = None
object: str = "chat.completion"
system_fingerprint: Optional[str] = None
usage: Optional[Usage] = None
_hidden_params: HiddenParams = {}
_response_ms: Optional[float] = None
class ModelResponseStream(BaseLiteLLMOpenAIResponseObject):
"""Streaming completion response chunk"""
id: str
choices: List[StreamingChoices]
created: int
model: Optional[str] = None
object: str = "chat.completion.chunk"
class Choices:
finish_reason: Optional[Literal["stop", "length", "function_call", "tool_calls", "content_filter"]] = None
index: int = 0
message: Optional[Message] = None
logprobs: Optional[ChoiceLogprobs] = None
class StreamingChoices:
finish_reason: Optional[str] = None
index: int = 0
delta: Optional[Delta] = None
logprobs: Optional[ChoiceLogprobs] = None
class Delta:
content: Optional[str] = None
role: Optional[str] = None
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
class Usage:
prompt_tokens: int
completion_tokens: Optional[int] = None
total_tokens: int
cache_creation_input_tokens: Optional[int] = None
cache_read_input_tokens: Optional[int] = None
class TextCompletionResponse(BaseLiteLLMOpenAIResponseObject):
"""Text completion response object"""
id: str
choices: List[TextChoices]
created: int
model: Optional[str] = None
object: str = "text_completion"
usage: Optional[Usage] = None
class TextChoices:
finish_reason: Optional[str] = None
index: int = 0
logprobs: Optional[TextChoicesLogprobs] = None
text: strimport litellm
# Simple completion
response = litellm.completion(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is machine learning?"}
]
)
print(response.choices[0].message.content)response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Write a story about AI"}],
stream=True,
max_tokens=500
)
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"}
},
"required": ["location"]
}
}
}]
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "What's the weather in Paris?"}],
tools=tools,
tool_choice="auto"
)
if response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
print(f"Function: {tool_call.function.name}")
print(f"Arguments: {tool_call.function.arguments}")# Vision model with image
messages = [{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.jpg"}
}
]
}]
response = litellm.completion(
model="gpt-4-vision-preview",
messages=messages
)import asyncio
async def test_multiple_models():
tasks = [
litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello from GPT-4"}]
),
litellm.acompletion(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Hello from Claude"}]
)
]
responses = await asyncio.gather(*tasks)
for i, response in enumerate(responses):
print(f"Response {i}: {response.choices[0].message.content}")
asyncio.run(test_multiple_models())# Anthropic Claude with specific parameters
response = litellm.completion(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Explain quantum physics"}],
max_tokens=1000,
temperature=0.7,
# Anthropic-specific
top_k=40,
custom_llm_provider="anthropic"
)
# Cohere with custom parameters
response = litellm.completion(
model="command-nightly",
messages=[{"role": "user", "content": "Write a summary"}],
# Cohere-specific
p=0.75,
k=0,
custom_llm_provider="cohere"
)try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
timeout=30
)
except litellm.RateLimitError as e:
print(f"Rate limit exceeded: {e}")
except litellm.AuthenticationError as e:
print(f"Authentication failed: {e}")
except litellm.ContextWindowExceededError as e:
print(f"Context window exceeded: {e}")
except Exception as e:
print(f"Unexpected error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-litellm