The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat, embeddings, and model management.
npx @tessl/cli install tessl/pypi-ollama@0.5.0The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat interactions, embeddings, and model management. Built on httpx for HTTP operations and pydantic for data validation, it offers comprehensive functionality for integrating with local or remote Ollama instances.
pip install ollamaimport ollama
from ollama import Client, AsyncClientFor data types and models:
from ollama import (
Message, Options, Tool, Image,
GenerateResponse, ChatResponse, EmbedResponse,
RequestError, ResponseError
)For utility functions:
from ollama._utils import convert_function_to_toolFor type annotations (when needed):
from typing import Union, Sequence, Mapping, Callable, Literal, Anyimport ollama
# Generate text
response = ollama.generate(
model='llama3.2',
prompt='Tell me about artificial intelligence'
)
print(response['response'])
# Chat with the model
messages = [
{'role': 'user', 'content': 'What is the capital of France?'}
]
response = ollama.chat(
model='llama3.2',
messages=messages
)
print(response['message']['content'])
# Create embeddings
response = ollama.embed(
model='nomic-embed-text',
input=['Hello world', 'Goodbye world']
)
print(response['embeddings'])
# List available models
models = ollama.list()
for model in models['models']:
print(model['name'])from ollama import Client
# Create a client instance
client = Client(host='http://localhost:11434')
# Generate text with streaming
for chunk in client.generate(
model='llama3.2',
prompt='Explain quantum computing',
stream=True
):
print(chunk['response'], end='', flush=True)
# Chat with function calling
from ollama._utils import convert_function_to_tool
def get_weather(city: str) -> str:
"""Get the weather for a city."""
return f"The weather in {city} is sunny, 22°C"
response = client.chat(
model='llama3.2',
messages=[
{'role': 'user', 'content': 'What is the weather in Paris?'}
],
tools=[convert_function_to_tool(get_weather)]
)import asyncio
from ollama import AsyncClient
async def main():
client = AsyncClient()
# Async generate
response = await client.generate(
model='llama3.2',
prompt='Write a haiku about programming'
)
print(response['response'])
# Async streaming
async for chunk in await client.chat(
model='llama3.2',
messages=[{'role': 'user', 'content': 'Tell me a story'}],
stream=True
):
print(chunk['message']['content'], end='', flush=True)
asyncio.run(main())The ollama client library is built around these key components:
Client (sync) and AsyncClient (async) provide the core API interfaceThe dual architecture allows both simple module-level usage (ollama.generate()) and advanced client-based usage (Client().generate()) depending on your needs.
Complete synchronous and asynchronous client classes providing the full Ollama API with configurable hosts, custom headers, timeouts, and comprehensive error handling.
class Client:
def __init__(self, host: str = None, **kwargs): ...
def generate(self, model: str = '', prompt: str = '', **kwargs): ...
def chat(self, model: str = '', messages: Sequence[Union[Mapping, Message]] = None, **kwargs): ...
def embed(self, model: str = '', input: Union[str, Sequence[str]] = '', **kwargs): ...
def pull(self, model: str, **kwargs): ...
def list(self): ...
class AsyncClient:
def __init__(self, host: str = None, **kwargs): ...
async def generate(self, model: str = '', prompt: str = '', **kwargs): ...
async def chat(self, model: str = '', messages: Sequence[Union[Mapping, Message]] = None, **kwargs): ...
async def embed(self, model: str = '', input: Union[str, Sequence[str]] = '', **kwargs): ...
async def pull(self, model: str, **kwargs): ...
async def list(self): ...Module-level functions that provide direct access to Ollama functionality without requiring explicit client instantiation, using a default client instance.
def generate(model: str = '', prompt: str = '', **kwargs): ...
def chat(model: str = '', messages: Sequence[Union[Mapping, Message]] = None, **kwargs): ...
def embed(model: str = '', input: Union[str, Sequence[str]] = '', **kwargs): ...
def pull(model: str, **kwargs): ...
def push(model: str, **kwargs): ...
def create(model: str, **kwargs): ...
def delete(model: str): ...
def list(): ...
def copy(source: str, destination: str): ...
def show(model: str): ...
def ps(): ...Comprehensive Pydantic data models for all API interactions including requests, responses, configuration options, and type definitions for messages, tools, and images.
class Message:
role: str
content: str
images: list[Image] = None
tool_calls: list[ToolCall] = None
class Options:
temperature: float = None
top_p: float = None
num_predict: int = None
# ... many more configuration options
class GenerateResponse:
response: str
context: list[int] = None
done: bool
# ... metadata fields
class ChatResponse:
message: Message
done: bool
# ... metadata fields