Kiln AI is a comprehensive platform for building, evaluating, and deploying AI systems with dataset management, model fine-tuning, RAG, and evaluation capabilities.
Comprehensive registry of supported AI models and embedding models with capability metadata, provider configurations, and model discovery functions.
Functions for discovering and retrieving model information.
from kiln_ai.adapters.ml_model_list import get_model_by_name, built_in_models_from_provider
def get_model_by_name(model_name: str):
"""
Retrieve model definition by name.
Parameters:
- model_name (str): Model identifier (e.g., "gpt_4o", "claude_3_5_sonnet")
Returns:
KilnModel | None: Model definition or None if not found
"""
def built_in_models_from_provider(provider_name: str) -> list:
"""
List all built-in models for a provider.
Parameters:
- provider_name (str): Provider identifier (e.g., "openai", "anthropic", "groq")
Returns:
list[KilnModel]: List of model definitions
"""
def default_structured_output_mode_for_model_provider(
model_id: str,
provider: str
):
"""
Get default structured output mode for model and provider.
Parameters:
- model_id (str): Model identifier
- provider (str): Provider name
Returns:
StructuredOutputMode: Default output mode (none, json, or structured)
"""Core model definition classes with capability metadata.
class KilnModel:
"""
Model definition with capabilities.
Properties:
- name (str): Model name/identifier
- family (ModelFamily): Model family (gpt, claude, llama, etc.)
- provider (KilnModelProvider): Provider configuration
- context_window (int): Maximum context window size in tokens
- supports_vision (bool): Whether model supports image inputs
- parser_id (ModelParserID): Parser for output processing
- formatter_id (ModelFormatterID): Formatter for input processing
- supports_streaming (bool): Whether model supports streaming
- supports_tools (bool): Whether model supports tool/function calling
- supports_structured_output (bool): Whether model supports structured JSON output
"""
class KilnModelProvider:
"""
Model provider configuration.
Properties:
- name (str): Provider name (e.g., "openai", "anthropic")
- supports_streaming (bool): Provider supports streaming responses
- supports_tools (bool): Provider supports tool/function calling
- supports_structured_output (bool): Provider supports structured output
"""Enumeration of model families for categorization.
class ModelFamily:
"""
Categories of model families.
Values:
- gpt: OpenAI GPT models
- claude: Anthropic Claude models
- llama: Meta Llama models
- mistral: Mistral AI models
- gemini: Google Gemini models
- qwen: Alibaba Qwen models
- deepseek: DeepSeek models
- command: Cohere Command models
- titan: AWS Titan models
- phi: Microsoft Phi models
- wizardlm: WizardLM models
- yi: 01.AI Yi models
- mixtral: Mixtral models
"""
gpt = "gpt"
claude = "claude"
llama = "llama"
mistral = "mistral"
gemini = "gemini"
qwen = "qwen"
deepseek = "deepseek"
command = "command"
titan = "titan"
phi = "phi"
wizardlm = "wizardlm"
yi = "yi"
mixtral = "mixtral"Enumeration of supported model identifiers.
class ModelName:
"""
Supported model identifiers.
Common values:
- gpt_4o: OpenAI GPT-4o
- gpt_4o_mini: OpenAI GPT-4o Mini
- gpt_4_turbo: OpenAI GPT-4 Turbo
- gpt_3_5_turbo: OpenAI GPT-3.5 Turbo
- claude_3_5_sonnet: Anthropic Claude 3.5 Sonnet
- claude_3_opus: Anthropic Claude 3 Opus
- claude_3_haiku: Anthropic Claude 3 Haiku
- llama_3_1_405b: Meta Llama 3.1 405B
- llama_3_1_70b: Meta Llama 3.1 70B
- llama_3_1_8b: Meta Llama 3.1 8B
- mistral_large: Mistral Large
- mistral_nemo: Mistral Nemo
- gemini_1_5_pro: Google Gemini 1.5 Pro
- gemini_1_5_flash: Google Gemini 1.5 Flash
- qwen_2_5_72b: Alibaba Qwen 2.5 72B
- deepseek_chat: DeepSeek Chat
"""
gpt_4o = "gpt-4o"
gpt_4o_mini = "gpt-4o-mini"
gpt_4_turbo = "gpt-4-turbo"
gpt_3_5_turbo = "gpt-3.5-turbo"
claude_3_5_sonnet = "claude-3-5-sonnet-20241022"
claude_3_opus = "claude-3-opus-20240229"
claude_3_haiku = "claude-3-haiku-20240307"
llama_3_1_405b = "llama-3.1-405b-instruct"
llama_3_1_70b = "llama-3.1-70b-instruct"
llama_3_1_8b = "llama-3.1-8b-instruct"
mistral_large = "mistral-large-latest"
mistral_nemo = "mistral-nemo"
gemini_1_5_pro = "gemini-1.5-pro"
gemini_1_5_flash = "gemini-1.5-flash"
qwen_2_5_72b = "qwen-2.5-72b-instruct"
deepseek_chat = "deepseek-chat"Identifiers for model input/output processing.
class ModelParserID:
"""
Parser identifiers for model outputs.
Values:
- default: Standard output parser
- r1_thinking: Parser for R1-style reasoning outputs
"""
default = "default"
r1_thinking = "r1_thinking"
class ModelFormatterID:
"""
Formatter identifiers for model inputs.
Values:
- default: Standard input formatter
- qwen3_no_think: Qwen3 formatter without thinking tags
"""
default = "default"
qwen3_no_think = "qwen3_no_think"Discovery and configuration for embedding models.
from kiln_ai.adapters.ml_embedding_model_list import (
get_model_by_name,
built_in_embedding_models_from_provider
)
def get_model_by_name(model_name: str):
"""
Get embedding model by name.
Parameters:
- model_name (str): Embedding model identifier
Returns:
KilnEmbeddingModel | None: Model definition or None if not found
"""
def built_in_embedding_models_from_provider(provider_name: str) -> list:
"""
List embedding models for provider.
Parameters:
- provider_name (str): Provider identifier
Returns:
list[KilnEmbeddingModel]: List of embedding model definitions
"""
class KilnEmbeddingModel:
"""
Embedding model definition.
Properties:
- name (str): Model name/identifier
- family (KilnEmbeddingModelFamily): Model family
- provider (KilnEmbeddingModelProvider): Provider configuration
- dimensions (int): Embedding vector dimensions
- max_input_tokens (int): Maximum input tokens
"""
class KilnEmbeddingModelProvider:
"""
Embedding provider configuration.
Properties:
- name (str): Provider name
- default_dimensions (int): Default embedding dimensions
"""Categories of embedding model families.
class KilnEmbeddingModelFamily:
"""
Embedding model families.
Values:
- openai: OpenAI embedding models
- cohere: Cohere embedding models
- voyage: Voyage AI embedding models
- sentence_transformers: Sentence Transformers models
"""
openai = "openai"
cohere = "cohere"
voyage = "voyage"
sentence_transformers = "sentence_transformers"Supported embedding model identifiers.
class EmbeddingModelName:
"""
Supported embedding models.
Values:
- text_embedding_3_small: OpenAI text-embedding-3-small
- text_embedding_3_large: OpenAI text-embedding-3-large
- text_embedding_ada_002: OpenAI text-embedding-ada-002
- embed_english_v3: Cohere embed-english-v3.0
- embed_multilingual_v3: Cohere embed-multilingual-v3.0
- voyage_large_2: Voyage AI voyage-large-2
- voyage_code_2: Voyage AI voyage-code-2
"""
text_embedding_3_small = "text-embedding-3-small"
text_embedding_3_large = "text-embedding-3-large"
text_embedding_ada_002 = "text-embedding-ada-002"
embed_english_v3 = "embed-english-v3.0"
embed_multilingual_v3 = "embed-multilingual-v3.0"
voyage_large_2 = "voyage-large-2"
voyage_code_2 = "voyage-code-2"from kiln_ai.adapters.ml_model_list import (
get_model_by_name,
built_in_models_from_provider
)
# Get specific model
model = get_model_by_name("gpt_4o")
if model:
print(f"Model: {model.name}")
print(f"Family: {model.family}")
print(f"Context window: {model.context_window}")
print(f"Supports vision: {model.supports_vision}")
print(f"Supports streaming: {model.supports_streaming}")
print(f"Supports tools: {model.supports_tools}")
# List all OpenAI models
openai_models = built_in_models_from_provider("openai")
for model in openai_models:
print(f"- {model.name} (context: {model.context_window})")
# List all Anthropic models
anthropic_models = built_in_models_from_provider("anthropic")
for model in anthropic_models:
print(f"- {model.name}")from kiln_ai.adapters.ml_model_list import get_model_by_name
def check_model_capabilities(model_name: str):
model = get_model_by_name(model_name)
if not model:
print(f"Model {model_name} not found")
return
print(f"Model: {model.name}")
print(f"Provider: {model.provider.name}")
print(f"Capabilities:")
print(f" - Vision: {model.supports_vision}")
print(f" - Streaming: {model.supports_streaming}")
print(f" - Tools: {model.supports_tools}")
print(f" - Structured Output: {model.supports_structured_output}")
print(f" - Context Window: {model.context_window} tokens")
# Check various models
check_model_capabilities("gpt_4o")
check_model_capabilities("claude_3_5_sonnet")
check_model_capabilities("llama_3_1_8b")from kiln_ai.adapters.ml_embedding_model_list import (
get_model_by_name,
built_in_embedding_models_from_provider
)
# Get specific embedding model
model = get_model_by_name("text_embedding_3_small")
if model:
print(f"Model: {model.name}")
print(f"Dimensions: {model.dimensions}")
print(f"Max tokens: {model.max_input_tokens}")
# List all OpenAI embedding models
openai_embeddings = built_in_embedding_models_from_provider("openai")
for model in openai_embeddings:
print(f"- {model.name}: {model.dimensions}D")
# Compare embedding models
models_to_compare = [
"text_embedding_3_small",
"text_embedding_3_large",
"embed_english_v3"
]
for model_name in models_to_compare:
model = get_model_by_name(model_name)
if model:
print(f"{model.name}:")
print(f" Provider: {model.provider.name}")
print(f" Dimensions: {model.dimensions}")
print(f" Max input: {model.max_input_tokens} tokens")from kiln_ai.adapters.ml_model_list import built_in_models_from_provider
# Find all models that support vision
def find_vision_models(provider: str):
models = built_in_models_from_provider(provider)
vision_models = [m for m in models if m.supports_vision]
return vision_models
openai_vision = find_vision_models("openai")
print("OpenAI models with vision support:")
for model in openai_vision:
print(f" - {model.name}")
# Find models with large context windows
def find_large_context_models(min_tokens: int):
providers = ["openai", "anthropic", "google"]
large_context = []
for provider in providers:
models = built_in_models_from_provider(provider)
for model in models:
if model.context_window >= min_tokens:
large_context.append(model)
return large_context
large_models = find_large_context_models(100000)
print(f"\nModels with 100K+ context:")
for model in large_models:
print(f" - {model.name}: {model.context_window:,} tokens")from kiln_ai.adapters import adapter_for_task
from kiln_ai.adapters.ml_model_list import get_model_by_name, ModelName
from kiln_ai.datamodel import Task
# Use model name enum for type safety
task = Task(
name="test_task",
instruction="Test instruction"
)
# Create adapter using model name
adapter = adapter_for_task(
task,
model_name=ModelName.gpt_4o,
provider="openai"
)
# Or verify model exists before creating adapter
model_to_use = "claude_3_5_sonnet"
model_info = get_model_by_name(model_to_use)
if model_info:
print(f"Using {model_info.name} with {model_info.context_window} context")
adapter = adapter_for_task(task, model_name=model_to_use, provider="anthropic")
else:
print(f"Model {model_to_use} not found")from kiln_ai.utils.config import Config
# Add custom model to existing provider
new_model = "openai::gpt-3.5-turbo-custom"
custom_models = Config.shared().custom_models or []
if new_model not in custom_models:
custom_models.append(new_model)
Config.shared().custom_models = custom_models
Config.shared().save()
print(f"Added custom model: {new_model}")
# List all custom models
print("Custom models:")
for model in Config.shared().custom_models:
print(f" - {model}")from kiln_ai.adapters.ml_model_list import (
get_model_by_name,
default_structured_output_mode_for_model_provider
)
from kiln_ai.datamodel import StructuredOutputMode
# Check structured output support
model_name = "gpt_4o"
provider = "openai"
model = get_model_by_name(model_name)
if model and model.supports_structured_output:
mode = default_structured_output_mode_for_model_provider(model_name, provider)
print(f"{model_name} supports structured output")
print(f"Default mode: {mode}")
if mode == StructuredOutputMode.structured:
print("Full schema validation available")
elif mode == StructuredOutputMode.json:
print("JSON mode available")Install with Tessl CLI
npx tessl i tessl/pypi-kiln-ai