The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat, embeddings, and model management.
—
Comprehensive Pydantic data models for all API interactions including requests, responses, configuration options, and type definitions for messages, tools, and images. All models inherit from SubscriptableBaseModel allowing both attribute and dictionary-style access.
Configuration classes for customizing model behavior and performance parameters.
class Options:
"""Configuration options for model parameters."""
# Load-time options
numa: bool = None
num_ctx: int = None
num_batch: int = None
num_gpu: int = None
main_gpu: int = None
low_vram: bool = None
f16_kv: bool = None
logits_all: bool = None
vocab_only: bool = None
use_mmap: bool = None
use_mlock: bool = None
embedding_only: bool = None
num_thread: int = None
# Runtime options
num_keep: int = None
seed: int = None
num_predict: int = None
top_k: int = None
top_p: float = None
tfs_z: float = None
typical_p: float = None
repeat_last_n: int = None
temperature: float = None
repeat_penalty: float = None
presence_penalty: float = None
frequency_penalty: float = None
mirostat: int = None
mirostat_tau: float = None
mirostat_eta: float = None
penalize_newline: bool = None
stop: list[str] = NoneData structures for chat messages, tool interactions, and multimodal content.
class Image:
"""Image data for multimodal models."""
value: str | bytes | Path
def __init__(self, value: str | bytes | Path):
"""
Create an Image object.
Parameters:
- value: Image as base64 string, raw bytes, or file path
"""
class Message:
"""Chat message structure."""
role: str
content: str = None
thinking: str = None
images: list[Image] = None
tool_name: str = None
tool_calls: list[ToolCall] = None
class ToolCall:
"""Tool call specification within a message."""
function: Function
class Function:
"""Function call details."""
name: str
arguments: dict
class Tool:
"""Tool definition for function calling."""
type: str = 'function'
function: Function = None
class Function:
"""Function specification for tools."""
name: str
description: str = None
parameters: Parameters = None
class Parameters:
"""Function parameters schema."""
type: str = 'object'
properties: dict[str, Property] = None
required: list[str] = None
class Property:
"""Parameter property definition."""
type: str
description: str = None
enum: list = NoneFunctions for converting Python functions to Tool objects for function calling.
def convert_function_to_tool(func: callable) -> Tool:
"""
Convert a Python function to a Tool object for function calling.
This function analyzes the function signature and docstring to create
a properly formatted Tool object that can be used with chat operations.
Parameters:
- func (callable): Python function to convert to tool
Returns:
Tool: Tool object with function schema derived from the input function
"""Response data structures returned by Ollama API operations.
class GenerateResponse:
"""Response from generate requests."""
model: str
created_at: str
response: str
thinking: str = None
context: list[int] = None
done: bool
done_reason: str = None
total_duration: int = None
load_duration: int = None
prompt_eval_count: int = None
prompt_eval_duration: int = None
eval_count: int = None
eval_duration: int = None
class ChatResponse:
"""Response from chat requests."""
model: str
created_at: str
message: Message
done: bool
done_reason: str = None
total_duration: int = None
load_duration: int = None
prompt_eval_count: int = None
prompt_eval_duration: int = None
eval_count: int = None
eval_duration: int = None
class EmbedResponse:
"""Response from embed requests."""
model: str
embeddings: list[list[float]]
total_duration: int = None
load_duration: int = None
prompt_eval_count: int = None
class EmbeddingsResponse:
"""Response from embeddings requests (deprecated)."""
embedding: list[float]
class StatusResponse:
"""Generic status response."""
status: str = None
class ProgressResponse(StatusResponse):
"""Progress response for streaming operations."""
completed: int = None
total: int = None
digest: str = NoneData structures for model metadata, capabilities, and system information.
class ListResponse:
"""Response from list requests."""
models: list[Model]
class Model:
"""Model information in list responses."""
name: str
model: str
modified_at: str
size: int
digest: str
details: Details = None
expires_at: str = None
size_vram: int = None
class Details:
"""Detailed model information."""
parent_model: str = None
format: str = None
family: str = None
families: list[str] = None
parameter_size: str = None
quantization_level: str = None
class ShowResponse:
"""Response from show requests."""
modified_at: str = None
template: str = None
modelfile: str = None
license: str = None
details: Details = None
modelinfo: dict = None
parameters: str = None
capabilities: Capabilities = None
class Details:
"""Detailed model specifications."""
parent_model: str = None
format: str = None
family: str = None
families: list[str] = None
parameter_size: str = None
quantization_level: str = None
class Capabilities:
"""Model capability information."""
completion: bool = None
chat: bool = None
embedding: bool = None
vision: bool = None
tools: bool = None
class ProcessResponse:
"""Response from ps (process status) requests."""
models: list[Model]
class Model:
"""Running model information."""
name: str
model: str
size: int
size_vram: int
expires_at: str
digest: strException classes for error handling and status reporting.
class RequestError(Exception):
"""Exception for request-related errors."""
error: str
def __init__(self, error: str):
"""
Create a RequestError.
Parameters:
- error: Error description
"""
self.error = error
super().__init__(error)
class ResponseError(Exception):
"""Exception for response-related errors."""
error: str
status_code: int
def __init__(self, error: str, status_code: int = -1):
"""
Create a ResponseError.
Parameters:
- error: Error description
- status_code: HTTP status code
"""
self.error = error
self.status_code = status_code
super().__init__(error)from ollama import Message, Image
# Text message
message = Message(
role='user',
content='Hello, how are you?'
)
# Message with images (multimodal)
message = Message(
role='user',
content='What do you see in this image?',
images=[Image(value='path/to/image.jpg')]
)
# Assistant message with tool calls
message = Message(
role='assistant',
content='I need to get the weather information.',
tool_calls=[
Message.ToolCall(
function=Message.ToolCall.Function(
name='get_weather',
arguments={'city': 'Paris'}
)
)
]
)from ollama import Options
# Conservative generation
conservative_options = Options(
temperature=0.1,
top_p=0.9,
repeat_penalty=1.1,
stop=['</end>', '\n\n']
)
# Creative generation
creative_options = Options(
temperature=0.9,
top_p=0.95,
top_k=40,
repeat_penalty=1.0
)
# Performance optimization
performance_options = Options(
num_ctx=4096,
num_batch=512,
num_gpu=2,
f16_kv=True,
use_mmap=True
)from ollama import Tool
# Manual tool definition
weather_tool = Tool(
type='function',
function=Tool.Function(
name='get_weather',
description='Get current weather for a city',
parameters=Tool.Function.Parameters(
type='object',
properties={
'city': Tool.Function.Parameters.Property(
type='string',
description='City name'
),
'units': Tool.Function.Parameters.Property(
type='string',
description='Temperature units',
enum=['celsius', 'fahrenheit']
)
},
required=['city']
)
)
)
# Automatic tool creation from function
from ollama._utils import convert_function_to_tool
def calculate_area(length: float, width: float) -> float:
"""Calculate the area of a rectangle."""
return length * width
area_tool = convert_function_to_tool(calculate_area)import ollama
# Generate with response handling
response = ollama.generate(
model='llama3.2',
prompt='Tell me a joke'
)
# Access response data
print(f"Model: {response['model']}")
print(f"Response: {response['response']}")
print(f"Tokens: {response.get('eval_count', 'Unknown')}")
if response['done']:
duration = response.get('total_duration', 0) / 1_000_000 # Convert to ms
print(f"Duration: {duration:.2f}ms")
# Chat response handling
chat_response = ollama.chat(
model='llama3.2',
messages=[{'role': 'user', 'content': 'Hello!'}]
)
message = chat_response['message']
print(f"Role: {message['role']}")
print(f"Content: {message['content']}")
# Handle tool calls if present
if message.get('tool_calls'):
for tool_call in message['tool_calls']:
func = tool_call['function']
print(f"Tool: {func['name']}")
print(f"Args: {func['arguments']}")import ollama
from ollama import RequestError, ResponseError
try:
response = ollama.generate(
model='nonexistent-model',
prompt='Hello'
)
except RequestError as e:
print(f"Request error: {e.error}")
except ResponseError as e:
print(f"Response error: {e.error} (status: {e.status_code})")
except Exception as e:
print(f"Unexpected error: {e}")import ollama
# List and process models
models_response = ollama.list()
for model in models_response['models']:
size_gb = model['size'] / (1024**3)
print(f"{model['name']}: {size_gb:.1f}GB")
if model.get('details'):
details = model['details']
print(f" Family: {details.get('family', 'Unknown')}")
print(f" Parameters: {details.get('parameter_size', 'Unknown')}")
# Show detailed model info
model_info = ollama.show('llama3.2')
if model_info.get('capabilities'):
caps = model_info['capabilities']
features = []
if caps.get('chat'): features.append('chat')
if caps.get('embedding'): features.append('embeddings')
if caps.get('vision'): features.append('vision')
if caps.get('tools'): features.append('tools')
print(f"Capabilities: {', '.join(features)}")
# Process running models
running = ollama.ps()
total_vram = sum(model['size_vram'] for model in running['models'])
print(f"Total VRAM usage: {total_vram / (1024**3):.1f}GB")import ollama
import numpy as np
# Generate embeddings
texts = ["Hello world", "Goodbye world", "Machine learning"]
responses = []
for text in texts:
response = ollama.embed(
model='nomic-embed-text',
input=text
)
responses.append(response)
# Work with embedding vectors
embeddings = [resp['embeddings'][0] for resp in responses]
# Calculate cosine similarities
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
for i, text1 in enumerate(texts):
for j, text2 in enumerate(texts[i+1:], i+1):
sim = cosine_similarity(embeddings[i], embeddings[j])
print(f"{text1} <-> {text2}: {sim:.3f}")Install with Tessl CLI
npx tessl i tessl/pypi-ollama