tessl/pypi-ollama

The official Python client library for Ollama, providing both synchronous and asynchronous interfaces for text generation, chat, embeddings, and model management.

—

Pending

Overview

Eval results

Files

Convenience Functions

Name: tessl/pypi-ollama
Author: tessl

Module-level functions that provide direct access to Ollama functionality without requiring explicit client instantiation. These functions use a default Client instance created when importing the ollama module, making them ideal for simple scripts and quick interactions.

Type Imports: The signatures in this documentation use these typing imports:

from typing import Union, Sequence, Mapping, Callable, Literal, Any, Iterator
from pydantic.json_schema import JsonSchemaValue

Capabilities

Text Generation Functions

Direct access to Ollama's text generation capabilities using module-level functions.

def generate(
    model: str = '',
    prompt: str = '',
    suffix: str = None,
    *,
    system: str = None,
    template: str = None,
    context: Sequence[int] = None,
    stream: bool = False,
    think: bool = None,
    raw: bool = None,
    format: str = None,
    images: Sequence[Union[str, bytes, Image]] = None,
    options: Union[Mapping[str, Any], Options] = None,
    keep_alive: Union[float, str] = None
) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
    """
    Generate text from a prompt using the default client.
    
    Parameters:
    - model (str): Model name to use for generation. Default: ''
    - prompt (str, optional): Text prompt for generation. Default: None
    - suffix (str, optional): Text to append after generation
    - system (str, optional): System message to set context
    - template (str, optional): Custom prompt template
    - context (list[int], optional): Token context from previous generation
    - stream (bool): Return streaming responses. Default: False
    - think (bool): Enable thinking mode for reasoning models
    - raw (bool): Use raw mode (no template processing)
    - format (str, optional): Response format ('json', etc.)
    - images (list[Image], optional): Images for multimodal models
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    GenerateResponse or Iterator[GenerateResponse] if streaming
    """

def chat(
    model: str = '',
    messages: Sequence[Union[Mapping[str, Any], Message]] = None,
    *,
    tools: Sequence[Union[Mapping[str, Any], Tool, Callable]] = None,
    stream: bool = False,
    think: Union[bool, Literal['low', 'medium', 'high']] = None,
    format: Union[Literal['', 'json'], JsonSchemaValue] = None,
    options: Union[Mapping[str, Any], Options] = None,
    keep_alive: Union[float, str] = None
) -> Union[ChatResponse, Iterator[ChatResponse]]:
    """
    Chat with a model using conversation history.
    
    Parameters:
    - model (str): Model name to use for chat. Default: ''
    - messages (Sequence[Union[Mapping, Message]], optional): Conversation messages. Default: None
    - tools (Sequence[Union[Mapping, Tool, Callable]], optional): Available tools for function calling
    - stream (bool): Return streaming responses. Default: False
    - think (Union[bool, Literal['low', 'medium', 'high']], optional): Enable thinking mode for reasoning models
    - format (str, optional): Response format ('json', etc.)
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    ChatResponse or Iterator[ChatResponse] if streaming
    """

Embedding Functions

Generate vector embeddings from text inputs for semantic analysis and search.

def embed(
    model: str = '',
    input: Union[str, Sequence[str]] = '',
    truncate: bool = None,
    options: Options = None,
    keep_alive: str = None
) -> EmbedResponse:
    """
    Generate embeddings for input text(s).
    
    Parameters:
    - model (str): Embedding model name
    - input (str | list[str]): Text or list of texts to embed
    - truncate (bool, optional): Truncate inputs that exceed model limits
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    EmbedResponse containing embedding vectors
    """

def embeddings(
    model: str,
    prompt: str,
    options: Options = None,
    keep_alive: str = None
) -> EmbeddingsResponse:
    """
    Generate embeddings (deprecated - use embed instead).
    
    Parameters:
    - model (str): Embedding model name
    - prompt (str): Text to embed
    - options (Options, optional): Model configuration options
    - keep_alive (str, optional): Keep model loaded duration
    
    Returns:
    EmbeddingsResponse containing single embedding vector
    """

Model Management Functions

Download, upload, create, and manage Ollama models with progress tracking.

def pull(
    model: str,
    *,
    insecure: bool = False,
    stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
    """
    Download a model from a model library.
    
    Parameters:
    - model (str): Model name to download
    - insecure (bool): Allow insecure connections. Default: False
    - stream (bool): Return streaming progress. Default: False
    
    Returns:
    ProgressResponse or Iterator[ProgressResponse] if streaming
    """

def push(
    model: str,
    *,
    insecure: bool = False,
    stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
    """
    Upload a model to a model library.
    
    Parameters:
    - model (str): Model name to upload
    - insecure (bool): Allow insecure connections. Default: False
    - stream (bool): Return streaming progress. Default: False
    
    Returns:
    ProgressResponse or Iterator[ProgressResponse] if streaming
    """

def create(
    model: str,
    quantize: str = None,
    from_: str = None,
    files: dict = None,
    adapters: dict[str, str] = None,
    template: str = None,
    license: Union[str, list[str]] = None,
    system: str = None,
    parameters: dict = None,
    messages: list[Message] = None,
    *,
    stream: bool = False
) -> ProgressResponse | Iterator[ProgressResponse]:
    """
    Create a new model from a Modelfile.
    
    Parameters:
    - model (str): Name for the new model
    - quantize (str, optional): Quantization method
    - from_ (str, optional): Base model to inherit from
    - files (dict, optional): Additional files to include
    - adapters (list[str], optional): Model adapters to apply
    - template (str, optional): Prompt template
    - license (str, optional): Model license
    - system (str, optional): System message template
    - parameters (dict, optional): Model parameters
    - messages (list[Message], optional): Example messages
    - stream (bool): Return streaming progress. Default: False
    
    Returns:
    ProgressResponse or Iterator[ProgressResponse] if streaming
    """

def delete(
    model: str
) -> StatusResponse:
    """
    Delete a model.
    
    Parameters:
    - model (str): Name of model to delete
    
    Returns:
    StatusResponse with deletion status
    """

def copy(
    source: str,
    destination: str
) -> StatusResponse:
    """
    Copy a model.
    
    Parameters:
    - source (str): Source model name
    - destination (str): Destination model name
    
    Returns:
    StatusResponse with copy status
    """

Model Information Functions

Retrieve information about available and running models.

def list() -> ListResponse:
    """
    List available models.
    
    Returns:
    ListResponse containing model information
    """

def show(
    model: str
) -> ShowResponse:
    """
    Show information about a specific model.
    
    Parameters:
    - model (str): Model name to show information for
    
    Returns:
    ShowResponse with detailed model information
    """

def ps() -> ProcessResponse:
    """
    List running models and their resource usage.
    
    Returns:
    ProcessResponse with currently running models
    """

Usage Examples

Basic Text Generation

import ollama

# Simple text generation
response = ollama.generate(
    model='llama3.2',
    prompt='Explain quantum computing in simple terms'
)
print(response['response'])

# With options
response = ollama.generate(
    model='llama3.2',
    prompt='Write a poem about autumn',
    options={'temperature': 0.8, 'top_p': 0.9}
)
print(response['response'])

Streaming Generation

import ollama

# Stream text generation
print("Generating story...")
for chunk in ollama.generate(
    model='llama3.2',
    prompt='Tell me a story about a brave knight',
    stream=True
):
    if chunk.get('response'):
        print(chunk['response'], end='', flush=True)
print()  # New line after streaming

Chat Conversations

import ollama

# Simple chat
messages = [
    {'role': 'user', 'content': 'Hello! What can you help me with?'}
]

response = ollama.chat(
    model='llama3.2',
    messages=messages
)
print(response['message']['content'])

# Multi-turn conversation
messages.append(response['message'])
messages.append({
    'role': 'user', 
    'content': 'Tell me about machine learning'
})

response = ollama.chat(
    model='llama3.2',
    messages=messages
)
print(response['message']['content'])

Function Calling

import ollama
from ollama import Tool

def get_current_time() -> str:
    """Get the current time."""
    from datetime import datetime
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

def calculate_tip(bill_amount: float, tip_percentage: float = 15.0) -> float:
    """Calculate tip amount."""
    return bill_amount * (tip_percentage / 100)

# Create tools from functions  
from ollama._utils import convert_function_to_tool

tools = [
    convert_function_to_tool(get_current_time),
    convert_function_to_tool(calculate_tip)
]

# Chat with function calling
response = ollama.chat(
    model='llama3.2',
    messages=[{
        'role': 'user',
        'content': 'What time is it and what would be a 20% tip on a $45 bill?'
    }],
    tools=tools
)

print(response['message']['content'])

Embeddings for Semantic Search

import ollama
import numpy as np

# Generate embeddings for documents
documents = [
    "The cat sat on the mat",
    "A dog played in the park",  
    "The feline rested on the rug",
    "Birds flew in the sky"
]

# Get embeddings
embeddings = []
for doc in documents:
    response = ollama.embed(
        model='nomic-embed-text',
        input=doc
    )
    embeddings.append(response['embeddings'][0])

# Query embedding
query = "A cat lying down"
query_response = ollama.embed(
    model='nomic-embed-text',
    input=query
)
query_embedding = query_response['embeddings'][0]

# Calculate similarities
similarities = []
for emb in embeddings:
    similarity = np.dot(query_embedding, emb) / (
        np.linalg.norm(query_embedding) * np.linalg.norm(emb)
    )
    similarities.append(similarity)

# Find most similar document
most_similar_idx = np.argmax(similarities)
print(f"Most similar: {documents[most_similar_idx]}")
print(f"Similarity: {similarities[most_similar_idx]:.3f}")

Model Management

import ollama

# List available models
models = ollama.list()
print("Available models:")
for model in models['models']:
    print(f"- {model['name']} ({model['size']})")

# Pull a new model
print("Downloading model...")
for progress in ollama.pull('phi3', stream=True):
    if progress.get('completed') and progress.get('total'):
        percent = (progress['completed'] / progress['total']) * 100
        print(f"Progress: {percent:.1f}%", end='\r')
print("\nDownload complete!")

# Show model details
details = ollama.show('phi3')
print(f"Model: {details.get('details', {}).get('family', 'Unknown')}")
print(f"Parameters: {details.get('details', {}).get('parameter_size', 'Unknown')}")

# Check running models
running = ollama.ps()
if running['models']:
    print("Currently running models:")
    for model in running['models']:
        print(f"- {model['name']}")
else:
    print("No models currently running")

Image Analysis (Multimodal)

import ollama
from ollama import Image

# Analyze an image
response = ollama.generate(
    model='llava',  # or another vision-capable model
    prompt='Describe what you see in this image',
    images=[Image(value='path/to/image.jpg')]
)
print(response['response'])

# Compare multiple images
response = ollama.chat(
    model='llava',
    messages=[{
        'role': 'user',
        'content': 'What are the differences between these two images?',
        'images': [
            Image(value='image1.jpg'),
            Image(value='image2.jpg')
        ]
    }]
)
print(response['message']['content'])

Install with Tessl CLI