The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat, embeddings, and model management.
—
Module-level functions that provide direct access to Ollama functionality without requiring explicit client instantiation. These functions use a default Client instance created when importing the ollama module, making them ideal for simple scripts and quick interactions.
Type Imports: The signatures in this documentation use these typing imports:
from typing import Union, Sequence, Mapping, Callable, Literal, Any, Iterator
from pydantic.json_schema import JsonSchemaValueDirect access to Ollama's text generation capabilities using module-level functions.
def generate(
model: str = '',
prompt: str = '',
suffix: str = None,
*,
system: str = None,
template: str = None,
context: Sequence[int] = None,
stream: bool = False,
think: bool = None,
raw: bool = None,
format: str = None,
images: Sequence[Union[str, bytes, Image]] = None,
options: Union[Mapping[str, Any], Options] = None,
keep_alive: Union[float, str] = None
) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
"""
Generate text from a prompt using the default client.
Parameters:
- model (str): Model name to use for generation. Default: ''
- prompt (str, optional): Text prompt for generation. Default: None
- suffix (str, optional): Text to append after generation
- system (str, optional): System message to set context
- template (str, optional): Custom prompt template
- context (list[int], optional): Token context from previous generation
- stream (bool): Return streaming responses. Default: False
- think (bool): Enable thinking mode for reasoning models
- raw (bool): Use raw mode (no template processing)
- format (str, optional): Response format ('json', etc.)
- images (list[Image], optional): Images for multimodal models
- options (Options, optional): Model configuration options
- keep_alive (str, optional): Keep model loaded duration
Returns:
GenerateResponse or Iterator[GenerateResponse] if streaming
"""
def chat(
model: str = '',
messages: Sequence[Union[Mapping[str, Any], Message]] = None,
*,
tools: Sequence[Union[Mapping[str, Any], Tool, Callable]] = None,
stream: bool = False,
think: Union[bool, Literal['low', 'medium', 'high']] = None,
format: Union[Literal['', 'json'], JsonSchemaValue] = None,
options: Union[Mapping[str, Any], Options] = None,
keep_alive: Union[float, str] = None
) -> Union[ChatResponse, Iterator[ChatResponse]]:
"""
Chat with a model using conversation history.
Parameters:
- model (str): Model name to use for chat. Default: ''
- messages (Sequence[Union[Mapping, Message]], optional): Conversation messages. Default: None
- tools (Sequence[Union[Mapping, Tool, Callable]], optional): Available tools for function calling
- stream (bool): Return streaming responses. Default: False
- think (Union[bool, Literal['low', 'medium', 'high']], optional): Enable thinking mode for reasoning models
- format (str, optional): Response format ('json', etc.)
- options (Options, optional): Model configuration options
- keep_alive (str, optional): Keep model loaded duration
Returns:
ChatResponse or Iterator[ChatResponse] if streaming
"""Generate vector embeddings from text inputs for semantic analysis and search.
def embed(
model: str = '',
input: Union[str, Sequence[str]] = '',
truncate: bool = None,
options: Options = None,
keep_alive: str = None
) -> EmbedResponse:
"""
Generate embeddings for input text(s).
Parameters:
- model (str): Embedding model name
- input (str | list[str]): Text or list of texts to embed
- truncate (bool, optional): Truncate inputs that exceed model limits
- options (Options, optional): Model configuration options
- keep_alive (str, optional): Keep model loaded duration
Returns:
EmbedResponse containing embedding vectors
"""
def embeddings(
model: str,
prompt: str,
options: Options = None,
keep_alive: str = None
) -> EmbeddingsResponse:
"""
Generate embeddings (deprecated - use embed instead).
Parameters:
- model (str): Embedding model name
- prompt (str): Text to embed
- options (Options, optional): Model configuration options
- keep_alive (str, optional): Keep model loaded duration
Returns:
EmbeddingsResponse containing single embedding vector
"""Download, upload, create, and manage Ollama models with progress tracking.
def pull(
model: str,
*,
insecure: bool = False,
stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
"""
Download a model from a model library.
Parameters:
- model (str): Model name to download
- insecure (bool): Allow insecure connections. Default: False
- stream (bool): Return streaming progress. Default: False
Returns:
ProgressResponse or Iterator[ProgressResponse] if streaming
"""
def push(
model: str,
*,
insecure: bool = False,
stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
"""
Upload a model to a model library.
Parameters:
- model (str): Model name to upload
- insecure (bool): Allow insecure connections. Default: False
- stream (bool): Return streaming progress. Default: False
Returns:
ProgressResponse or Iterator[ProgressResponse] if streaming
"""
def create(
model: str,
quantize: str = None,
from_: str = None,
files: dict = None,
adapters: dict[str, str] = None,
template: str = None,
license: Union[str, list[str]] = None,
system: str = None,
parameters: dict = None,
messages: list[Message] = None,
*,
stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
"""
Create a new model from a Modelfile.
Parameters:
- model (str): Name for the new model
- quantize (str, optional): Quantization method
- from_ (str, optional): Base model to inherit from
- files (dict, optional): Additional files to include
- adapters (list[str], optional): Model adapters to apply
- template (str, optional): Prompt template
- license (str, optional): Model license
- system (str, optional): System message template
- parameters (dict, optional): Model parameters
- messages (list[Message], optional): Example messages
- stream (bool): Return streaming progress. Default: False
Returns:
ProgressResponse or Iterator[ProgressResponse] if streaming
"""
def delete(
model: str
) -> StatusResponse:
"""
Delete a model.
Parameters:
- model (str): Name of model to delete
Returns:
StatusResponse with deletion status
"""
def copy(
source: str,
destination: str
) -> StatusResponse:
"""
Copy a model.
Parameters:
- source (str): Source model name
- destination (str): Destination model name
Returns:
StatusResponse with copy status
"""Retrieve information about available and running models.
def list() -> ListResponse:
"""
List available models.
Returns:
ListResponse containing model information
"""
def show(
model: str
) -> ShowResponse:
"""
Show information about a specific model.
Parameters:
- model (str): Model name to show information for
Returns:
ShowResponse with detailed model information
"""
def ps() -> ProcessResponse:
"""
List running models and their resource usage.
Returns:
ProcessResponse with currently running models
"""import ollama
# Simple text generation
response = ollama.generate(
model='llama3.2',
prompt='Explain quantum computing in simple terms'
)
print(response['response'])
# With options
response = ollama.generate(
model='llama3.2',
prompt='Write a poem about autumn',
options={'temperature': 0.8, 'top_p': 0.9}
)
print(response['response'])import ollama
# Stream text generation
print("Generating story...")
for chunk in ollama.generate(
model='llama3.2',
prompt='Tell me a story about a brave knight',
stream=True
):
if chunk.get('response'):
print(chunk['response'], end='', flush=True)
print() # New line after streamingimport ollama
# Simple chat
messages = [
{'role': 'user', 'content': 'Hello! What can you help me with?'}
]
response = ollama.chat(
model='llama3.2',
messages=messages
)
print(response['message']['content'])
# Multi-turn conversation
messages.append(response['message'])
messages.append({
'role': 'user',
'content': 'Tell me about machine learning'
})
response = ollama.chat(
model='llama3.2',
messages=messages
)
print(response['message']['content'])import ollama
from ollama import Tool
def get_current_time() -> str:
"""Get the current time."""
from datetime import datetime
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def calculate_tip(bill_amount: float, tip_percentage: float = 15.0) -> float:
"""Calculate tip amount."""
return bill_amount * (tip_percentage / 100)
# Create tools from functions
from ollama._utils import convert_function_to_tool
tools = [
convert_function_to_tool(get_current_time),
convert_function_to_tool(calculate_tip)
]
# Chat with function calling
response = ollama.chat(
model='llama3.2',
messages=[{
'role': 'user',
'content': 'What time is it and what would be a 20% tip on a $45 bill?'
}],
tools=tools
)
print(response['message']['content'])import ollama
import numpy as np
# Generate embeddings for documents
documents = [
"The cat sat on the mat",
"A dog played in the park",
"The feline rested on the rug",
"Birds flew in the sky"
]
# Get embeddings
embeddings = []
for doc in documents:
response = ollama.embed(
model='nomic-embed-text',
input=doc
)
embeddings.append(response['embeddings'][0])
# Query embedding
query = "A cat lying down"
query_response = ollama.embed(
model='nomic-embed-text',
input=query
)
query_embedding = query_response['embeddings'][0]
# Calculate similarities
similarities = []
for emb in embeddings:
similarity = np.dot(query_embedding, emb) / (
np.linalg.norm(query_embedding) * np.linalg.norm(emb)
)
similarities.append(similarity)
# Find most similar document
most_similar_idx = np.argmax(similarities)
print(f"Most similar: {documents[most_similar_idx]}")
print(f"Similarity: {similarities[most_similar_idx]:.3f}")import ollama
# List available models
models = ollama.list()
print("Available models:")
for model in models['models']:
print(f"- {model['name']} ({model['size']})")
# Pull a new model
print("Downloading model...")
for progress in ollama.pull('phi3', stream=True):
if progress.get('completed') and progress.get('total'):
percent = (progress['completed'] / progress['total']) * 100
print(f"Progress: {percent:.1f}%", end='\r')
print("\nDownload complete!")
# Show model details
details = ollama.show('phi3')
print(f"Model: {details.get('details', {}).get('family', 'Unknown')}")
print(f"Parameters: {details.get('details', {}).get('parameter_size', 'Unknown')}")
# Check running models
running = ollama.ps()
if running['models']:
print("Currently running models:")
for model in running['models']:
print(f"- {model['name']}")
else:
print("No models currently running")import ollama
from ollama import Image
# Analyze an image
response = ollama.generate(
model='llava', # or another vision-capable model
prompt='Describe what you see in this image',
images=[Image(value='path/to/image.jpg')]
)
print(response['response'])
# Compare multiple images
response = ollama.chat(
model='llava',
messages=[{
'role': 'user',
'content': 'What are the differences between these two images?',
'images': [
Image(value='image1.jpg'),
Image(value='image2.jpg')
]
}]
)
print(response['message']['content'])Install with Tessl CLI
npx tessl i tessl/pypi-ollama