CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-litellm

Library to easily interface with LLM API providers

Pending
Overview
Eval results
Files

core-completion.mddocs/

Core Completion API

Fundamental completion functionality that forms the foundation of LiteLLM's unified interface. These functions provide chat completion, text completion, and streaming support across 100+ LLM providers with OpenAI-compatible parameters.

Capabilities

Chat Completion

Primary function for conversational AI interactions using the messages format. Supports all OpenAI parameters and provider-specific extensions.

def completion(
    model: str,
    messages: List[Dict[str, Any]],
    # Standard OpenAI parameters
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    n: Optional[int] = None,
    stream: Optional[bool] = None,
    stop: Optional[Union[str, List[str]]] = None,
    max_tokens: Optional[int] = None,
    presence_penalty: Optional[float] = None,
    frequency_penalty: Optional[float] = None,
    logit_bias: Optional[Dict[str, float]] = None,
    user: Optional[str] = None,
    response_format: Optional[Dict[str, Any]] = None,
    seed: Optional[int] = None,
    # Function calling
    tools: Optional[List[Dict[str, Any]]] = None,
    tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
    functions: Optional[List[Dict[str, Any]]] = None,
    function_call: Optional[Union[str, Dict[str, Any]]] = None,
    # LiteLLM specific parameters
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    api_version: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    # Streaming and caching
    stream: Optional[bool] = None,
    cache: Optional[Dict[str, Any]] = None,
    # Provider-specific overrides
    **kwargs
) -> Union[ModelResponse, Iterator[ModelResponseStream]]
    """
    Create a chat completion using any supported LLM provider.

    Args:
        model (str): Model identifier (e.g., "gpt-4", "claude-3-sonnet-20240229")
        messages (List[Dict[str, Any]]): Conversation messages in OpenAI format
        temperature (Optional[float]): Sampling temperature (0.0 to 2.0)
        max_tokens (Optional[int]): Maximum tokens to generate
        stream (Optional[bool]): Enable streaming response
        tools (Optional[List[Dict[str, Any]]]): Available function tools
        tool_choice (Optional[Union[str, Dict[str, Any]]]): Tool selection strategy
        timeout (Optional[float]): Request timeout in seconds
        api_key (Optional[str]): Provider API key override
        custom_llm_provider (Optional[str]): Force specific provider

    Returns:
        Union[ModelResponse, Iterator[ModelResponseStream]]: Completion response or stream
    
    Raises:
        AuthenticationError: Invalid API key or authentication failure
        RateLimitError: Rate limit exceeded
        ContextWindowExceededError: Input exceeds model's context window
        InvalidRequestError: Invalid parameters or model not found
    """

Async Chat Completion

Asynchronous version of the completion function for concurrent processing and improved performance.

async def acompletion(
    model: str,
    messages: List[Dict[str, Any]],
    # All same parameters as completion()
    **kwargs
) -> Union[ModelResponse, AsyncIterator[ModelResponseStream]]
    """
    Async version of completion() for concurrent LLM requests.

    Args:
        Same as completion() function

    Returns:
        Union[ModelResponse, AsyncIterator[ModelResponseStream]]: Async completion response or stream
    """

Text Completion

Legacy text completion interface for prompt-based models and compatibility with older model types.

def text_completion(
    model: str,
    prompt: str,
    # Standard parameters
    temperature: Optional[float] = None,
    max_tokens: Optional[int] = None,
    top_p: Optional[float] = None,
    frequency_penalty: Optional[float] = None,
    presence_penalty: Optional[float] = None,
    stop: Optional[Union[str, List[str]]] = None,
    stream: Optional[bool] = None,
    n: Optional[int] = None,
    logit_bias: Optional[Dict[str, float]] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> Union[TextCompletionResponse, Iterator[TextCompletionResponse]]
    """
    Create a text completion using prompt-based models.

    Args:
        model (str): Model identifier
        prompt (str): Input text prompt
        temperature (Optional[float]): Sampling temperature
        max_tokens (Optional[int]): Maximum tokens to generate
        stream (Optional[bool]): Enable streaming response
        stop (Optional[Union[str, List[str]]]): Stop sequences
        timeout (Optional[float]): Request timeout in seconds

    Returns:
        Union[TextCompletionResponse, Iterator[TextCompletionResponse]]: Text completion response
    """

Async Text Completion

Asynchronous version of text completion for concurrent processing.

async def atext_completion(
    model: str,
    prompt: str,
    **kwargs
) -> Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]
    """
    Async version of text_completion() for concurrent requests.

    Args:
        Same as text_completion() function

    Returns:
        Union[TextCompletionResponse, AsyncIterator[TextCompletionResponse]]: Async text completion response
    """

Message Format

class Message:
    """OpenAI-compatible message format"""
    role: Literal["system", "user", "assistant", "tool"]
    content: Optional[Union[str, List[Dict[str, Any]]]]
    name: Optional[str] = None
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
    tool_call_id: Optional[str] = None

class ChatCompletionMessageToolCall:
    id: str
    type: Literal["function"]
    function: Function

class Function:
    name: str
    arguments: str

Response Objects

class ModelResponse(BaseLiteLLMOpenAIResponseObject):
    """Main completion response object"""
    id: str
    choices: List[Choices]
    created: int
    model: Optional[str] = None
    object: str = "chat.completion"
    system_fingerprint: Optional[str] = None
    usage: Optional[Usage] = None
    _hidden_params: HiddenParams = {}
    _response_ms: Optional[float] = None

class ModelResponseStream(BaseLiteLLMOpenAIResponseObject):
    """Streaming completion response chunk"""
    id: str
    choices: List[StreamingChoices]
    created: int
    model: Optional[str] = None
    object: str = "chat.completion.chunk"

class Choices:
    finish_reason: Optional[Literal["stop", "length", "function_call", "tool_calls", "content_filter"]] = None
    index: int = 0
    message: Optional[Message] = None
    logprobs: Optional[ChoiceLogprobs] = None

class StreamingChoices:
    finish_reason: Optional[str] = None
    index: int = 0
    delta: Optional[Delta] = None
    logprobs: Optional[ChoiceLogprobs] = None

class Delta:
    content: Optional[str] = None
    role: Optional[str] = None
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None

class Usage:
    prompt_tokens: int
    completion_tokens: Optional[int] = None
    total_tokens: int
    cache_creation_input_tokens: Optional[int] = None
    cache_read_input_tokens: Optional[int] = None

class TextCompletionResponse(BaseLiteLLMOpenAIResponseObject):
    """Text completion response object"""
    id: str
    choices: List[TextChoices]
    created: int
    model: Optional[str] = None
    object: str = "text_completion"
    usage: Optional[Usage] = None

class TextChoices:
    finish_reason: Optional[str] = None
    index: int = 0
    logprobs: Optional[TextChoicesLogprobs] = None
    text: str

Usage Examples

Basic Chat Completion

import litellm

# Simple completion
response = litellm.completion(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is machine learning?"}
    ]
)

print(response.choices[0].message.content)

Streaming Completion

response = litellm.completion(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": "Write a story about AI"}],
    stream=True,
    max_tokens=500
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")

Function Calling

tools = [{
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get current weather for a location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {"type": "string", "description": "City name"}
            },
            "required": ["location"]
        }
    }
}]

response = litellm.completion(
    model="gpt-4",
    messages=[{"role": "user", "content": "What's the weather in Paris?"}],
    tools=tools,
    tool_choice="auto"
)

if response.choices[0].message.tool_calls:
    tool_call = response.choices[0].message.tool_calls[0]
    print(f"Function: {tool_call.function.name}")
    print(f"Arguments: {tool_call.function.arguments}")

Multi-modal Completion

# Vision model with image
messages = [{
    "role": "user",
    "content": [
        {"type": "text", "text": "What's in this image?"},
        {
            "type": "image_url",
            "image_url": {"url": "https://example.com/image.jpg"}
        }
    ]
}]

response = litellm.completion(
    model="gpt-4-vision-preview",
    messages=messages
)

Async Completion with Multiple Models

import asyncio

async def test_multiple_models():
    tasks = [
        litellm.acompletion(
            model="gpt-4",
            messages=[{"role": "user", "content": "Hello from GPT-4"}]
        ),
        litellm.acompletion(
            model="claude-3-sonnet-20240229",
            messages=[{"role": "user", "content": "Hello from Claude"}]
        )
    ]
    
    responses = await asyncio.gather(*tasks)
    for i, response in enumerate(responses):
        print(f"Response {i}: {response.choices[0].message.content}")

asyncio.run(test_multiple_models())

Provider-specific Parameters

# Anthropic Claude with specific parameters
response = litellm.completion(
    model="claude-3-sonnet-20240229",
    messages=[{"role": "user", "content": "Explain quantum physics"}],
    max_tokens=1000,
    temperature=0.7,
    # Anthropic-specific
    top_k=40,
    custom_llm_provider="anthropic"
)

# Cohere with custom parameters
response = litellm.completion(
    model="command-nightly",
    messages=[{"role": "user", "content": "Write a summary"}],
    # Cohere-specific
    p=0.75,
    k=0,
    custom_llm_provider="cohere"
)

Error Handling

try:
    response = litellm.completion(
        model="gpt-4",
        messages=[{"role": "user", "content": "Hello"}],
        timeout=30
    )
except litellm.RateLimitError as e:
    print(f"Rate limit exceeded: {e}")
except litellm.AuthenticationError as e:
    print(f"Authentication failed: {e}")
except litellm.ContextWindowExceededError as e:
    print(f"Context window exceeded: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

Install with Tessl CLI

npx tessl i tessl/pypi-litellm

docs

core-completion.md

exceptions.md

index.md

other-apis.md

providers.md

router.md

utilities.md

tile.json