LLM framework to build customizable, production-ready LLM applications.
—
Large language model integrations for text generation, chat completions, and answer synthesis. Supports multiple providers including OpenAI, Azure OpenAI, and HuggingFace models.
Generate text using OpenAI's GPT models for completion tasks and answer synthesis.
class OpenAIGenerator:
def __init__(
self,
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
model: str = "gpt-4o-mini",
streaming_callback: Optional[StreamingCallbackT] = None,
api_base_url: Optional[str] = None,
organization: Optional[str] = None,
system_prompt: Optional[str] = None,
generation_kwargs: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
max_retries: Optional[int] = None,
http_client_kwargs: Optional[Dict[str, Any]] = None
) -> None:
"""
Initialize OpenAI text generator using chat completion API.
Args:
api_key: OpenAI API key (defaults to OPENAI_API_KEY env var)
model: OpenAI model name (defaults to gpt-4o-mini)
streaming_callback: Callback for streaming responses
api_base_url: Custom API base URL
organization: OpenAI organization ID
system_prompt: Default system prompt
generation_kwargs: Additional generation parameters
timeout: Request timeout in seconds
max_retries: Maximum number of retries
http_client_kwargs: HTTP client configuration
"""
def run(
self,
prompt: str,
system_prompt: Optional[str] = None,
streaming_callback: Optional[StreamingCallbackT] = None,
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[str]]:
"""
Generate text using the OpenAI chat completion API.
Args:
prompt: Text prompt for generation
system_prompt: System prompt (overrides default if provided)
streaming_callback: Callback for streaming responses
generation_kwargs: Additional generation parameters
Returns:
Dictionary with 'replies' key containing list of generated texts
and 'meta' key containing response metadata
"""
class AzureOpenAIGenerator:
def __init__(
self,
azure_endpoint: str,
api_version: str,
api_key: Secret = None,
azure_ad_token: Secret = None,
model: str = "gpt-35-turbo-instruct",
max_tokens: Optional[int] = None,
temperature: float = 1.0,
top_p: float = 1.0,
n: int = 1,
stop: Optional[List[str]] = None,
presence_penalty: float = 0.0,
frequency_penalty: float = 0.0
) -> None:
"""
Initialize Azure OpenAI text generator.
Args:
azure_endpoint: Azure OpenAI endpoint URL
api_version: Azure OpenAI API version
api_key: Azure OpenAI API key
azure_ad_token: Azure AD token for authentication
model: Deployment name of the model
max_tokens: Maximum tokens to generate
temperature: Sampling temperature
top_p: Nucleus sampling parameter
n: Number of completions to generate
stop: Stop sequences
presence_penalty: Presence penalty
frequency_penalty: Frequency penalty
"""
def run(
self,
prompt: str,
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[str]]:
"""Generate text using Azure OpenAI."""Generate conversational responses using OpenAI's chat models with support for function calling and streaming.
class OpenAIChatGenerator:
def __init__(
self,
api_key: Secret = None,
model: str = "gpt-3.5-turbo",
max_tokens: Optional[int] = None,
temperature: float = 1.0,
top_p: float = 1.0,
n: int = 1,
stop: Optional[List[str]] = None,
presence_penalty: float = 0.0,
frequency_penalty: float = 0.0,
tools: Optional[List[Tool]] = None,
tool_choice: Optional[Union[str, Dict[str, str]]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None
) -> None:
"""
Initialize OpenAI chat generator.
Args:
api_key: OpenAI API key
model: OpenAI model name
max_tokens: Maximum tokens to generate
temperature: Sampling temperature
top_p: Nucleus sampling parameter
n: Number of chat completion choices
stop: Stop sequences
presence_penalty: Presence penalty
frequency_penalty: Frequency penalty
tools: List of tools available for function calling
tool_choice: How the model should choose tools
streaming_callback: Callback for streaming responses
"""
def run(
self,
messages: List[ChatMessage],
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[ChatMessage]]:
"""
Generate chat completions for the given messages.
Args:
messages: List of chat messages
generation_kwargs: Additional generation parameters
Returns:
Dictionary with 'replies' key containing list of ChatMessage responses
"""
class AzureOpenAIChatGenerator:
def __init__(
self,
azure_endpoint: str,
api_version: str,
api_key: Secret = None,
azure_ad_token: Secret = None,
model: str = "gpt-35-turbo",
max_tokens: Optional[int] = None,
temperature: float = 1.0,
top_p: float = 1.0,
n: int = 1,
stop: Optional[List[str]] = None,
presence_penalty: float = 0.0,
frequency_penalty: float = 0.0,
tools: Optional[List[Tool]] = None,
tool_choice: Optional[Union[str, Dict[str, str]]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None
) -> None:
"""Initialize Azure OpenAI chat generator."""
def run(
self,
messages: List[ChatMessage],
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[ChatMessage]]:
"""Generate chat completions using Azure OpenAI."""Generate text using HuggingFace transformer models, both locally and via API.
class HuggingFaceLocalGenerator:
def __init__(
self,
model: str = "gpt2",
task: Optional[str] = None,
device: Optional[ComponentDevice] = None,
token: Secret = None,
generation_kwargs: Optional[Dict[str, Any]] = None,
huggingface_pipeline_kwargs: Optional[Dict[str, Any]] = None,
stop_words: Optional[List[str]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None
) -> None:
"""
Initialize HuggingFace local generator.
Args:
model: HuggingFace model name or path
task: Task type (text-generation, text2text-generation)
device: Device for model inference
token: HuggingFace token for private models
generation_kwargs: Generation parameters
huggingface_pipeline_kwargs: Pipeline initialization parameters
stop_words: Stop words for generation
streaming_callback: Callback for streaming responses
"""
def run(
self,
prompt: str,
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[str]]:
"""
Generate text using local HuggingFace model.
Args:
prompt: Input prompt for generation
generation_kwargs: Additional generation parameters
Returns:
Dictionary with 'replies' key containing generated texts
"""
class HuggingFaceAPIGenerator:
def __init__(
self,
api_type: Literal["serverless_inference_api", "inference_endpoints"] = "serverless_inference_api",
api_url: Optional[str] = None,
token: Secret = None,
model: Optional[str] = None,
max_tokens: Optional[int] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
temperature: Optional[float] = None,
repetition_penalty: Optional[float] = None,
stop_sequences: Optional[List[str]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None
) -> None:
"""
Initialize HuggingFace API generator.
Args:
api_type: Type of HuggingFace API to use
api_url: Custom API endpoint URL
token: HuggingFace API token
model: Model name for serverless inference
max_tokens: Maximum tokens to generate
top_k: Top-k sampling parameter
top_p: Top-p sampling parameter
temperature: Sampling temperature
repetition_penalty: Repetition penalty
stop_sequences: Stop sequences
streaming_callback: Callback for streaming responses
"""
def run(
self,
prompt: str,
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[str]]:
"""Generate text using HuggingFace API."""
class HuggingFaceLocalChatGenerator:
def __init__(
self,
model: str = "microsoft/DialoGPT-medium",
device: Optional[ComponentDevice] = None,
token: Secret = None,
generation_kwargs: Optional[Dict[str, Any]] = None,
huggingface_pipeline_kwargs: Optional[Dict[str, Any]] = None,
stop_words: Optional[List[str]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None
) -> None:
"""Initialize HuggingFace local chat generator."""
def run(
self,
messages: List[ChatMessage],
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[ChatMessage]]:
"""Generate chat responses using local HuggingFace model."""
class HuggingFaceAPIChatGenerator:
def __init__(
self,
api_type: Literal["serverless_inference_api", "inference_endpoints"] = "serverless_inference_api",
api_url: Optional[str] = None,
token: Secret = None,
model: Optional[str] = None,
max_tokens: Optional[int] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
temperature: Optional[float] = None,
repetition_penalty: Optional[float] = None,
stop_sequences: Optional[List[str]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None
) -> None:
"""Initialize HuggingFace API chat generator."""
def run(
self,
messages: List[ChatMessage],
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[ChatMessage]]:
"""Generate chat responses using HuggingFace API."""Generate images using DALL-E models.
class DALLEImageGenerator:
def __init__(
self,
api_key: Secret = None,
model: str = "dall-e-2",
size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"] = "1024x1024",
quality: Literal["standard", "hd"] = "standard",
n: int = 1,
response_format: Literal["url", "b64_json"] = "url",
style: Optional[Literal["vivid", "natural"]] = None
) -> None:
"""
Initialize DALL-E image generator.
Args:
api_key: OpenAI API key
model: DALL-E model name
size: Image size
quality: Image quality
n: Number of images to generate
response_format: Response format
style: Image style (DALL-E 3 only)
"""
def run(
self,
prompt: str,
generation_kwargs: Optional[Dict[str, Any]] = None
) -> Dict[str, List[ByteStream]]:
"""
Generate images from text prompt.
Args:
prompt: Text description of desired image
generation_kwargs: Additional generation parameters
Returns:
Dictionary with 'images' key containing list of generated images
"""from haystack.components.generators import OpenAIGenerator
from haystack.utils import Secret
# Initialize generator
generator = OpenAIGenerator(
api_key=Secret.from_env_var("OPENAI_API_KEY"),
model="gpt-3.5-turbo-instruct",
max_tokens=100
)
# Generate text
result = generator.run(prompt="Explain quantum computing in simple terms.")
print(result["replies"][0])from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import ChatMessage, ChatRole
from haystack.tools import Tool
# Define a tool
def get_weather(location: str) -> str:
"""Get weather for a location."""
return f"Weather in {location}: Sunny, 22°C"
weather_tool = Tool.from_function(get_weather)
# Initialize chat generator with tools
chat_generator = OpenAIChatGenerator(
api_key=Secret.from_env_var("OPENAI_API_KEY"),
model="gpt-3.5-turbo",
tools=[weather_tool]
)
# Create messages
messages = [
ChatMessage(content="What's the weather like in Paris?", role=ChatRole.USER)
]
# Generate response
result = chat_generator.run(messages=messages)
response_message = result["replies"][0]
print(f"Assistant: {response_message.content}")
# If the model made a tool call, execute it
if response_message.tool_calls:
for tool_call in response_message.tool_calls:
if tool_call.tool_name == "get_weather":
weather_result = get_weather(**tool_call.arguments)
print(f"Weather result: {weather_result}")from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import ChatMessage, ChatRole, StreamingChunk
def streaming_callback(chunk: StreamingChunk) -> None:
"""Handle streaming chunks."""
if chunk.content:
print(chunk.content, end="", flush=True)
# Initialize generator with streaming
chat_generator = OpenAIChatGenerator(
api_key=Secret.from_env_var("OPENAI_API_KEY"),
model="gpt-3.5-turbo",
streaming_callback=streaming_callback
)
messages = [
ChatMessage(content="Write a short story about AI.", role=ChatRole.USER)
]
result = chat_generator.run(messages=messages)from typing import Literal, Optional, Dict, Any, List, Callable, Union
from haystack.dataclasses import ChatMessage, StreamingChunk, ByteStream
from haystack.tools import Tool
from haystack.utils import Secret, ComponentDevice
class FinishReason(Enum):
STOP = "stop"
LENGTH = "length"
TOOL_CALLS = "tool_calls"
CONTENT_FILTER = "content_filter"Install with Tessl CLI
npx tessl i tessl/pypi-haystack-ai