CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ollama

The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat, embeddings, and model management.

Pending
Overview
Eval results
Files

clients.mddocs/

Client Operations

Complete synchronous and asynchronous client classes providing the full Ollama API with configurable hosts, custom headers, timeouts, and comprehensive error handling.

Type Imports: The signatures in this documentation use these typing imports:

from typing import Union, Sequence, Mapping, Callable, Literal, Any, Iterator
from pydantic.json_schema import JsonSchemaValue

Capabilities

Client Class (Synchronous)

Synchronous HTTP client for Ollama API operations with configurable connection settings.

class Client:
    def __init__(
        self,
        host: str = None,
        *,
        follow_redirects: bool = True,
        timeout: Any = None,
        headers: dict[str, str] = None,
        **kwargs
    ):
        """
        Create a synchronous Ollama client.
        
        Parameters:
        - host (str, optional): Ollama server host URL. Defaults to OLLAMA_HOST env var or localhost:11434
        - follow_redirects (bool): Whether to follow HTTP redirects. Default: True
        - timeout: Request timeout configuration
        - headers (dict): Custom HTTP headers
        - **kwargs: Additional httpx client arguments
        """

Text Generation

Generate text completions from prompts with extensive configuration options.

def generate(
    self,
    model: str = '',
    prompt: str = '',
    suffix: str = None,
    *,
    system: str = None,
    template: str = None,
    context: Sequence[int] = None,
    stream: bool = False,
    think: bool = None,
    raw: bool = None,
    format: str = None,
    images: Sequence[Union[str, bytes, Image]] = None,
    options: Union[Mapping[str, Any], Options] = None,
    keep_alive: Union[float, str] = None
) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
    """
    Generate text from a prompt.
    
    Parameters:
    - model (str): Model name to use for generation. Default: ''
    - prompt (str, optional): Text prompt for generation. Default: None
    - suffix (str, optional): Text to append after generation
    - system (str, optional): System message to set context
    - template (str, optional): Custom prompt template
    - context (list[int], optional): Token context from previous generation
    - stream (bool): Return streaming responses. Default: False
    - think (bool): Enable thinking mode for reasoning models
    - raw (bool): Use raw mode (no template processing)
    - format (str, optional): Response format ('json', etc.)
    - images (list[Image], optional): Images for multimodal models
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    GenerateResponse or Iterator[GenerateResponse] if streaming
    """

Chat Operations

Conduct multi-turn conversations with context preservation and tool calling support.

def chat(
    self,
    model: str = '',
    messages: Sequence[Union[Mapping[str, Any], Message]] = None,
    *,
    tools: Sequence[Union[Mapping[str, Any], Tool, Callable]] = None,
    stream: bool = False,
    think: Union[bool, Literal['low', 'medium', 'high']] = None,
    format: Union[Literal['', 'json'], JsonSchemaValue] = None,
    options: Union[Mapping[str, Any], Options] = None,
    keep_alive: Union[float, str] = None
) -> Union[ChatResponse, Iterator[ChatResponse]]:
    """
    Chat with a model using conversation history.
    
    Parameters:
    - model (str): Model name to use for chat. Default: ''
    - messages (Sequence[Union[Mapping, Message]], optional): Conversation messages. Default: None
    - tools (Sequence[Union[Mapping, Tool, Callable]], optional): Available tools for function calling
    - stream (bool): Return streaming responses. Default: False
    - think (Union[bool, Literal['low', 'medium', 'high']], optional): Enable thinking mode for reasoning models
    - format (str, optional): Response format ('json', etc.)
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    ChatResponse or Iterator[ChatResponse] if streaming
    """

Embeddings

Generate vector embeddings from text inputs for semantic similarity and search applications.

def embed(
    self,
    model: str = '',
    input: Union[str, Sequence[str]] = '',
    truncate: bool = None,
    options: Options = None,
    keep_alive: str = None
) -> EmbedResponse:
    """
    Generate embeddings for input text(s).
    
    Parameters:
    - model (str): Embedding model name
    - input (str | list[str]): Text or list of texts to embed
    - truncate (bool, optional): Truncate inputs that exceed model limits
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    EmbedResponse containing embedding vectors
    """

def embeddings(
    self,
    model: str,
    prompt: str,
    options: Options = None,
    keep_alive: str = None
) -> EmbeddingsResponse:
    """
    Generate embeddings (deprecated - use embed instead).
    
    Parameters:
    - model (str): Embedding model name
    - prompt (str): Text to embed
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    EmbeddingsResponse containing single embedding vector
    """

Model Management

Download, upload, create, and manage Ollama models with progress tracking.

def pull(
    self,
    model: str,
    *,
    insecure: bool = False,
    stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
    """
    Download a model from a model library.
    
    Parameters:
    - model (str): Model name to download
    - insecure (bool): Allow insecure connections. Default: False
    - stream (bool): Return streaming progress. Default: False
    
    Returns:
    ProgressResponse or Iterator[ProgressResponse] if streaming
    """

def push(
    self,
    model: str,
    *,
    insecure: bool = False,
    stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
    """
    Upload a model to a model library.
    
    Parameters:
    - model (str): Model name to upload
    - insecure (bool): Allow insecure connections. Default: False
    - stream (bool): Return streaming progress. Default: False
    
    Returns:
    ProgressResponse or Iterator[ProgressResponse] if streaming
    """

def create(
    self,
    model: str,
    quantize: str = None,
    from_: str = None,
    files: dict = None,
    adapters: dict[str, str] = None,
    template: str = None,
    license: Union[str, list[str]] = None,
    system: str = None,
    parameters: dict = None,
    messages: list[Message] = None,
    *,
    stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
    """
    Create a new model from a Modelfile.
    
    Parameters:
    - model (str): Name for the new model
    - quantize (str, optional): Quantization method
    - from_ (str, optional): Base model to inherit from
    - files (dict, optional): Additional files to include
    - adapters (list[str], optional): Model adapters to apply
    - template (str, optional): Prompt template
    - license (str, optional): Model license
    - system (str, optional): System message template
    - parameters (dict, optional): Model parameters
    - messages (list[Message], optional): Example messages
    - stream (bool): Return streaming progress. Default: False
    
    Returns:
    ProgressResponse or Iterator[ProgressResponse] if streaming
    """

def create_blob(
    self,
    path: Union[str, Path]
) -> str:
    """
    Create a blob from a file for model creation.
    
    Parameters:
    - path (str | Path): Path to file to create blob from
    
    Returns:
    str: Blob digest hash
    """

def delete(
    self,
    model: str
) -> StatusResponse:
    """
    Delete a model.
    
    Parameters:
    - model (str): Name of model to delete
    
    Returns:
    StatusResponse with deletion status
    """

def copy(
    self,
    source: str,
    destination: str
) -> StatusResponse:
    """
    Copy a model.
    
    Parameters:
    - source (str): Source model name
    - destination (str): Destination model name
    
    Returns:
    StatusResponse with copy status
    """

Model Information

Retrieve information about available and running models.

def list(
    self
) -> ListResponse:
    """
    List available models.
    
    Returns:
    ListResponse containing model information
    """

def show(
    self,
    model: str
) -> ShowResponse:
    """
    Show information about a specific model.
    
    Parameters:
    - model (str): Model name to show information for
    
    Returns:
    ShowResponse with detailed model information
    """

def ps(
    self
) -> ProcessResponse:
    """
    List running models and their resource usage.
    
    Returns:
    ProcessResponse with currently running models
    """

AsyncClient Class (Asynchronous)

Asynchronous HTTP client for Ollama API operations with the same interface as Client but using async/await patterns.

class AsyncClient:
    def __init__(
        self,
        host: str = None,
        *,
        follow_redirects: bool = True,
        timeout: Any = None,
        headers: dict[str, str] = None,
        **kwargs
    ):
        """
        Create an asynchronous Ollama client.
        
        Parameters: Same as Client class
        """

    async def generate(self, model: str = '', prompt: str = '', **kwargs):
        """Async version of Client.generate()"""

    async def chat(self, model: str = '', messages: Sequence[Union[Mapping, Message]] = None, **kwargs):
        """Async version of Client.chat()"""

    async def embed(self, model: str = '', input: Union[str, Sequence[str]] = '', **kwargs):
        """Async version of Client.embed()"""

    async def embeddings(self, model: str, prompt: str, **kwargs):
        """Async version of Client.embeddings() (deprecated)"""

    async def pull(self, model: str, **kwargs):
        """Async version of Client.pull()"""

    async def push(self, model: str, **kwargs):
        """Async version of Client.push()"""

    async def create(self, model: str, **kwargs):
        """Async version of Client.create()"""

    async def create_blob(self, path: Union[str, Path]) -> str:
        """Async version of Client.create_blob()"""

    async def delete(self, model: str) -> StatusResponse:
        """Async version of Client.delete()"""

    async def copy(self, source: str, destination: str) -> StatusResponse:
        """Async version of Client.copy()"""

    async def list(self) -> ListResponse:
        """Async version of Client.list()"""

    async def show(self, model: str) -> ShowResponse:
        """Async version of Client.show()"""

    async def ps(self) -> ProcessResponse:
        """Async version of Client.ps()"""

Usage Examples

Custom Client Configuration

from ollama import Client
import httpx

# Custom client with authentication
client = Client(
    host='https://my-ollama-server.com',
    headers={'Authorization': 'Bearer token'},
    timeout=httpx.Timeout(30.0)
)

# Generate with custom client
response = client.generate(
    model='custom-model',
    prompt='Hello, world!',
    options={'temperature': 0.7}
)

Streaming with Progress Tracking

from ollama import Client

client = Client()

# Stream text generation
print("Generating story...")
for chunk in client.generate(
    model='llama3.2',
    prompt='Write a short story about a robot',
    stream=True
):
    if chunk.get('response'):
        print(chunk['response'], end='', flush=True)

print("\n\nPulling model...")
# Stream model download progress  
for progress in client.pull('phi3', stream=True):
    if progress.get('completed') and progress.get('total'):
        percent = (progress['completed'] / progress['total']) * 100
        print(f"Progress: {percent:.1f}%")

Async Context Management

import asyncio
from ollama import AsyncClient

async def main():
    async with AsyncClient() as client:
        # Concurrent requests
        tasks = [
            client.generate(model='llama3.2', prompt=f'Story {i}')
            for i in range(3)
        ]
        
        responses = await asyncio.gather(*tasks)
        for i, response in enumerate(responses):
            print(f"Story {i}: {response['response'][:100]}...")

asyncio.run(main())

Install with Tessl CLI

npx tessl i tessl/pypi-ollama

docs

clients.md

convenience-functions.md

data-types.md

index.md

tile.json