CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ollama

The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat, embeddings, and model management.

Pending
Overview
Eval results
Files

data-types.mddocs/

Data Types and Models

Comprehensive Pydantic data models for all API interactions including requests, responses, configuration options, and type definitions for messages, tools, and images. All models inherit from SubscriptableBaseModel allowing both attribute and dictionary-style access.

Capabilities

Configuration Models

Configuration classes for customizing model behavior and performance parameters.

class Options:
    """Configuration options for model parameters."""
    
    # Load-time options
    numa: bool = None
    num_ctx: int = None
    num_batch: int = None
    num_gpu: int = None
    main_gpu: int = None
    low_vram: bool = None
    f16_kv: bool = None
    logits_all: bool = None
    vocab_only: bool = None
    use_mmap: bool = None
    use_mlock: bool = None
    embedding_only: bool = None
    num_thread: int = None
    
    # Runtime options
    num_keep: int = None
    seed: int = None
    num_predict: int = None
    top_k: int = None
    top_p: float = None
    tfs_z: float = None
    typical_p: float = None
    repeat_last_n: int = None
    temperature: float = None
    repeat_penalty: float = None
    presence_penalty: float = None
    frequency_penalty: float = None
    mirostat: int = None
    mirostat_tau: float = None
    mirostat_eta: float = None
    penalize_newline: bool = None
    stop: list[str] = None

Message and Communication Models

Data structures for chat messages, tool interactions, and multimodal content.

class Image:
    """Image data for multimodal models."""
    value: str | bytes | Path
    
    def __init__(self, value: str | bytes | Path):
        """
        Create an Image object.
        
        Parameters:
        - value: Image as base64 string, raw bytes, or file path
        """

class Message:
    """Chat message structure."""
    role: str
    content: str = None
    thinking: str = None
    images: list[Image] = None
    tool_name: str = None
    tool_calls: list[ToolCall] = None
    
    class ToolCall:
        """Tool call specification within a message."""
        function: Function
        
        class Function:
            """Function call details."""
            name: str
            arguments: dict

class Tool:
    """Tool definition for function calling."""
    type: str = 'function'
    function: Function = None
    
    class Function:
        """Function specification for tools."""
        name: str
        description: str = None
        parameters: Parameters = None
        
        class Parameters:
            """Function parameters schema."""
            type: str = 'object'
            properties: dict[str, Property] = None
            required: list[str] = None
            
            class Property:
                """Parameter property definition."""
                type: str
                description: str = None
                enum: list = None

Utility Functions

Functions for converting Python functions to Tool objects for function calling.

def convert_function_to_tool(func: callable) -> Tool:
    """
    Convert a Python function to a Tool object for function calling.
    
    This function analyzes the function signature and docstring to create
    a properly formatted Tool object that can be used with chat operations.
    
    Parameters:
    - func (callable): Python function to convert to tool
    
    Returns:
    Tool: Tool object with function schema derived from the input function
    """

Response Models

Response data structures returned by Ollama API operations.

class GenerateResponse:
    """Response from generate requests."""
    model: str
    created_at: str
    response: str
    thinking: str = None
    context: list[int] = None
    done: bool
    done_reason: str = None
    total_duration: int = None
    load_duration: int = None
    prompt_eval_count: int = None
    prompt_eval_duration: int = None
    eval_count: int = None
    eval_duration: int = None

class ChatResponse:
    """Response from chat requests."""
    model: str
    created_at: str
    message: Message
    done: bool
    done_reason: str = None
    total_duration: int = None
    load_duration: int = None
    prompt_eval_count: int = None
    prompt_eval_duration: int = None
    eval_count: int = None
    eval_duration: int = None

class EmbedResponse:
    """Response from embed requests."""
    model: str
    embeddings: list[list[float]]
    total_duration: int = None
    load_duration: int = None
    prompt_eval_count: int = None

class EmbeddingsResponse:
    """Response from embeddings requests (deprecated)."""
    embedding: list[float]

class StatusResponse:
    """Generic status response."""
    status: str = None

class ProgressResponse(StatusResponse):
    """Progress response for streaming operations."""
    completed: int = None
    total: int = None
    digest: str = None

Model Information Models

Data structures for model metadata, capabilities, and system information.

class ListResponse:
    """Response from list requests."""
    models: list[Model]
    
    class Model:
        """Model information in list responses."""
        name: str
        model: str
        modified_at: str
        size: int
        digest: str
        details: Details = None
        expires_at: str = None
        size_vram: int = None
        
        class Details:
            """Detailed model information."""
            parent_model: str = None
            format: str = None
            family: str = None
            families: list[str] = None
            parameter_size: str = None
            quantization_level: str = None

class ShowResponse:
    """Response from show requests."""
    modified_at: str = None
    template: str = None
    modelfile: str = None
    license: str = None
    details: Details = None
    modelinfo: dict = None
    parameters: str = None
    capabilities: Capabilities = None
    
    class Details:
        """Detailed model specifications."""
        parent_model: str = None
        format: str = None
        family: str = None
        families: list[str] = None
        parameter_size: str = None
        quantization_level: str = None
    
    class Capabilities:
        """Model capability information."""
        completion: bool = None
        chat: bool = None
        embedding: bool = None
        vision: bool = None
        tools: bool = None

class ProcessResponse:
    """Response from ps (process status) requests."""
    models: list[Model]
    
    class Model:
        """Running model information."""
        name: str
        model: str
        size: int
        size_vram: int
        expires_at: str
        digest: str

Exception Models

Exception classes for error handling and status reporting.

class RequestError(Exception):
    """Exception for request-related errors."""
    error: str
    
    def __init__(self, error: str):
        """
        Create a RequestError.
        
        Parameters:
        - error: Error description
        """
        self.error = error
        super().__init__(error)

class ResponseError(Exception):
    """Exception for response-related errors."""
    error: str
    status_code: int
    
    def __init__(self, error: str, status_code: int = -1):
        """
        Create a ResponseError.
        
        Parameters:
        - error: Error description  
        - status_code: HTTP status code
        """
        self.error = error
        self.status_code = status_code
        super().__init__(error)

Usage Examples

Working with Messages

from ollama import Message, Image

# Text message
message = Message(
    role='user',
    content='Hello, how are you?'
)

# Message with images (multimodal)
message = Message(
    role='user',
    content='What do you see in this image?',
    images=[Image(value='path/to/image.jpg')]
)

# Assistant message with tool calls
message = Message(
    role='assistant',
    content='I need to get the weather information.',
    tool_calls=[
        Message.ToolCall(
            function=Message.ToolCall.Function(
                name='get_weather',
                arguments={'city': 'Paris'}
            )
        )
    ]
)

Configuration with Options

from ollama import Options

# Conservative generation
conservative_options = Options(
    temperature=0.1,
    top_p=0.9,
    repeat_penalty=1.1,
    stop=['</end>', '\n\n']
)

# Creative generation
creative_options = Options(
    temperature=0.9,
    top_p=0.95,
    top_k=40,
    repeat_penalty=1.0
)

# Performance optimization
performance_options = Options(
    num_ctx=4096,
    num_batch=512,
    num_gpu=2,
    f16_kv=True,
    use_mmap=True
)

Tool Definition

from ollama import Tool

# Manual tool definition
weather_tool = Tool(
    type='function',
    function=Tool.Function(
        name='get_weather',
        description='Get current weather for a city',
        parameters=Tool.Function.Parameters(
            type='object',
            properties={
                'city': Tool.Function.Parameters.Property(
                    type='string',
                    description='City name'
                ),
                'units': Tool.Function.Parameters.Property(
                    type='string',
                    description='Temperature units',
                    enum=['celsius', 'fahrenheit']
                )
            },
            required=['city']
        )
    )
)

# Automatic tool creation from function
from ollama._utils import convert_function_to_tool

def calculate_area(length: float, width: float) -> float:
    """Calculate the area of a rectangle."""
    return length * width

area_tool = convert_function_to_tool(calculate_area)

Response Handling

import ollama

# Generate with response handling
response = ollama.generate(
    model='llama3.2',
    prompt='Tell me a joke'
)

# Access response data
print(f"Model: {response['model']}")
print(f"Response: {response['response']}")
print(f"Tokens: {response.get('eval_count', 'Unknown')}")

if response['done']:
    duration = response.get('total_duration', 0) / 1_000_000  # Convert to ms
    print(f"Duration: {duration:.2f}ms")

# Chat response handling
chat_response = ollama.chat(
    model='llama3.2',
    messages=[{'role': 'user', 'content': 'Hello!'}]
)

message = chat_response['message']
print(f"Role: {message['role']}")  
print(f"Content: {message['content']}")

# Handle tool calls if present
if message.get('tool_calls'):
    for tool_call in message['tool_calls']:
        func = tool_call['function']
        print(f"Tool: {func['name']}")
        print(f"Args: {func['arguments']}")

Error Handling

import ollama
from ollama import RequestError, ResponseError

try:
    response = ollama.generate(
        model='nonexistent-model',
        prompt='Hello'
    )
except RequestError as e:
    print(f"Request error: {e.error}")
except ResponseError as e:
    print(f"Response error: {e.error} (status: {e.status_code})")
except Exception as e:
    print(f"Unexpected error: {e}")

Model Information Processing

import ollama

# List and process models
models_response = ollama.list()
for model in models_response['models']:
    size_gb = model['size'] / (1024**3)
    print(f"{model['name']}: {size_gb:.1f}GB")
    
    if model.get('details'):
        details = model['details']
        print(f"  Family: {details.get('family', 'Unknown')}")
        print(f"  Parameters: {details.get('parameter_size', 'Unknown')}")

# Show detailed model info
model_info = ollama.show('llama3.2')
if model_info.get('capabilities'):
    caps = model_info['capabilities']
    features = []
    if caps.get('chat'): features.append('chat')
    if caps.get('embedding'): features.append('embeddings')
    if caps.get('vision'): features.append('vision')
    if caps.get('tools'): features.append('tools')
    print(f"Capabilities: {', '.join(features)}")

# Process running models
running = ollama.ps()
total_vram = sum(model['size_vram'] for model in running['models'])
print(f"Total VRAM usage: {total_vram / (1024**3):.1f}GB")

Embedding Analysis

import ollama
import numpy as np

# Generate embeddings
texts = ["Hello world", "Goodbye world", "Machine learning"]
responses = []

for text in texts:
    response = ollama.embed(
        model='nomic-embed-text',
        input=text
    )
    responses.append(response)

# Work with embedding vectors
embeddings = [resp['embeddings'][0] for resp in responses]

# Calculate cosine similarities
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

for i, text1 in enumerate(texts):
    for j, text2 in enumerate(texts[i+1:], i+1):
        sim = cosine_similarity(embeddings[i], embeddings[j])
        print(f"{text1} <-> {text2}: {sim:.3f}")

Install with Tessl CLI

npx tessl i tessl/pypi-ollama

docs

clients.md

convenience-functions.md

data-types.md

index.md

tile.json