tessl/pypi-litellm

Library to easily interface with LLM API providers

—

Pending

Overview

Eval results

Files

Embeddings & Other APIs

Name: tessl/pypi-litellm
Author: tessl

Specialized API endpoints for embedding generation, image creation, audio processing, moderation, and other non-completion services. These functions provide unified interfaces for diverse AI capabilities across multiple providers.

Capabilities

Embedding Generation

Generate vector embeddings from text inputs using various embedding models across different providers.

def embedding(
    model: str,
    input: Union[str, List[str], List[int], List[List[int]]],
    # Optional parameters
    encoding_format: Optional[Literal["float", "base64"]] = None,
    dimensions: Optional[int] = None,
    user: Optional[str] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    api_version: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> EmbeddingResponse:
    """
    Generate embeddings for input text using specified embedding model.

    Args:
        model (str): Embedding model identifier (e.g., "text-embedding-ada-002", "embed-english-v3.0")
        input (Union[str, List[str], List[int], List[List[int]]]): Text or tokens to embed
        encoding_format (Optional[str]): Format for embedding values ("float" or "base64")
        dimensions (Optional[int]): Number of dimensions for embedding (if supported)
        user (Optional[str]): User identifier for tracking
        timeout (Optional[float]): Request timeout in seconds
        api_key (Optional[str]): Provider API key override

    Returns:
        EmbeddingResponse: Embedding vectors with usage information

    Raises:
        AuthenticationError: Invalid API key
        InvalidRequestError: Invalid model or parameters
        RateLimitError: Rate limit exceeded
    """

async def aembedding(
    model: str,
    input: Union[str, List[str], List[int], List[List[int]]],
    **kwargs
) -> EmbeddingResponse:
    """
    Async version of embedding generation.

    Args:
        Same as embedding() function

    Returns:
        EmbeddingResponse: Async embedding response
    """

Image Generation

Create images from text prompts using various image generation models.

def image_generation(
    prompt: str,
    model: Optional[str] = None,
    # Generation parameters
    n: Optional[int] = None,
    quality: Optional[Literal["standard", "hd"]] = None,
    response_format: Optional[Literal["url", "b64_json"]] = None,
    size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] = None,
    style: Optional[Literal["vivid", "natural"]] = None,
    user: Optional[str] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> ImageResponse:
    """
    Generate images from text prompts.

    Args:
        prompt (str): Text description of desired image
        model (Optional[str]): Image generation model (e.g., "dall-e-3", "dall-e-2")
        n (Optional[int]): Number of images to generate
        quality (Optional[str]): Image quality level ("standard" or "hd")
        size (Optional[str]): Image dimensions
        style (Optional[str]): Image style ("vivid" or "natural")
        response_format (Optional[str]): Return format ("url" or "b64_json")

    Returns:
        ImageResponse: Generated image URLs or base64 data

    Raises:
        ContentPolicyViolationError: Prompt violates content policy
        InvalidRequestError: Invalid parameters or model
    """

async def aimage_generation(
    prompt: str,
    **kwargs
) -> ImageResponse:
    """
    Async image generation.

    Args:
        Same as image_generation() function

    Returns:
        ImageResponse: Async image generation response
    """

Audio Transcription

Convert audio files to text using speech-to-text models.

def transcription(
    model: str,
    file: Union[str, bytes, IO],
    # Transcription parameters
    language: Optional[str] = None,
    prompt: Optional[str] = None,
    response_format: Optional[Literal["json", "text", "srt", "verbose_json", "vtt"]] = None,
    temperature: Optional[float] = None,
    timestamp_granularities: Optional[List[Literal["word", "segment"]]] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> TranscriptionResponse:
    """
    Transcribe audio to text.

    Args:
        model (str): Transcription model (e.g., "whisper-1")
        file (Union[str, bytes, IO]): Audio file path, bytes, or file object
        language (Optional[str]): Audio language code (ISO-639-1)
        prompt (Optional[str]): Optional text to guide transcription
        response_format (Optional[str]): Output format ("json", "text", "srt", etc.)
        temperature (Optional[float]): Sampling temperature (0.0 to 1.0)
        timestamp_granularities (Optional[List[str]]): Timestamp detail level

    Returns:
        TranscriptionResponse: Transcribed text with metadata

    Raises:
        InvalidRequestError: Unsupported file format or invalid parameters
        APIError: Transcription service error
    """

async def atranscription(
    model: str,
    file: Union[str, bytes, IO],
    **kwargs
) -> TranscriptionResponse:
    """
    Async audio transcription.

    Args:
        Same as transcription() function

    Returns:
        TranscriptionResponse: Async transcription response
    """

Audio Speech Synthesis

Generate speech audio from text using text-to-speech models.

def speech(
    model: str,
    input: str,
    voice: str,
    # Speech parameters
    response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]] = None,
    speed: Optional[float] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> bytes:
    """
    Generate speech audio from text.

    Args:
        model (str): Speech synthesis model (e.g., "tts-1", "tts-1-hd")
        input (str): Text to convert to speech
        voice (str): Voice identifier ("alloy", "echo", "fable", "onyx", "nova", "shimmer")
        response_format (Optional[str]): Audio format ("mp3", "opus", "aac", etc.)
        speed (Optional[float]): Playback speed (0.25 to 4.0)

    Returns:
        bytes: Generated audio data

    Raises:
        InvalidRequestError: Invalid voice or parameters
        APIError: Speech synthesis error
    """

async def aspeech(
    model: str,
    input: str,
    voice: str,
    **kwargs
) -> bytes:
    """
    Async speech synthesis.

    Args:
        Same as speech() function

    Returns:
        bytes: Async speech audio data
    """

Content Moderation

Detect potentially harmful or inappropriate content in text.

def moderation(
    input: Union[str, List[str]],
    model: Optional[str] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> ModerationCreateResponse:
    """
    Check content for policy violations.

    Args:
        input (Union[str, List[str]]): Text or list of texts to moderate
        model (Optional[str]): Moderation model (e.g., "text-moderation-latest")
        timeout (Optional[float]): Request timeout in seconds

    Returns:
        ModerationCreateResponse: Moderation results with category flags

    Raises:
        InvalidRequestError: Invalid input format
        APIError: Moderation service error
    """

async def amoderation(
    input: Union[str, List[str]],
    **kwargs
) -> ModerationCreateResponse:
    """
    Async content moderation.

    Args:
        Same as moderation() function

    Returns:
        ModerationCreateResponse: Async moderation response
    """

Reranking

Reorder documents by relevance to a query using reranking models.

def rerank(
    model: str,
    query: str,
    documents: List[Union[str, Dict[str, Any]]],
    top_n: Optional[int] = None,
    return_documents: Optional[bool] = None,
    # LiteLLM specific
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs
) -> RerankResponse:
    """
    Rerank documents by relevance to query.

    Args:
        model (str): Reranking model (e.g., "rerank-english-v3.0")
        query (str): Search query
        documents (List[Union[str, Dict]]): Documents to rank
        top_n (Optional[int]): Number of top results to return
        return_documents (Optional[bool]): Include document content in response

    Returns:
        RerankResponse: Ranked documents with relevance scores

    Raises:
        InvalidRequestError: Invalid documents or parameters
    """

async def arerank(
    model: str,
    query: str,
    documents: List[Union[str, Dict[str, Any]]],
    **kwargs
) -> RerankResponse:
    """
    Async document reranking.

    Args:
        Same as rerank() function

    Returns:
        RerankResponse: Async reranking response
    """

Response Objects

class EmbeddingResponse(OpenAIObject):
    """Embedding generation response"""
    object: str = "list"
    data: List[EmbeddingData]
    model: Optional[str]
    usage: Optional[Usage]
    _hidden_params: HiddenParams = {}

class EmbeddingData:
    """Individual embedding vector"""
    object: str = "embedding"
    index: int
    embedding: List[float]

class ImageResponse(OpenAIObject):
    """Image generation response"""
    created: int
    data: List[ImageObject]
    usage: Optional[Usage] = None
    _hidden_params: HiddenParams = {}

class ImageObject:
    """Individual generated image"""
    b64_json: Optional[str] = None
    url: Optional[str] = None
    revised_prompt: Optional[str] = None

class TranscriptionResponse(OpenAIObject):
    """Audio transcription response"""
    text: str
    task: Optional[str] = None
    language: Optional[str] = None
    duration: Optional[float] = None
    segments: Optional[List[TranscriptionSegment]] = None
    words: Optional[List[TranscriptionWord]] = None
    _hidden_params: HiddenParams = {}

class TranscriptionSegment:
    """Transcription segment with timestamp"""
    id: int
    seek: int
    start: float
    end: float
    text: str
    tokens: List[int]
    temperature: float
    avg_logprob: float
    compression_ratio: float
    no_speech_prob: float

class TranscriptionWord:
    """Individual word with timestamp"""
    word: str
    start: float
    end: float

class ModerationCreateResponse(OpenAIObject):
    """Content moderation response"""
    id: str
    model: str
    results: List[ModerationObject]

class ModerationObject:
    """Individual moderation result"""
    flagged: bool
    categories: ModerationCategories
    category_scores: ModerationCategoryScores

class ModerationCategories:
    """Moderation category flags"""
    hate: bool
    hate_threatening: bool
    harassment: bool
    harassment_threatening: bool
    self_harm: bool
    self_harm_intent: bool
    self_harm_instructions: bool
    sexual: bool
    sexual_minors: bool
    violence: bool
    violence_graphic: bool

class ModerationCategoryScores:
    """Moderation confidence scores"""
    hate: float
    hate_threatening: float
    harassment: float
    harassment_threatening: float
    self_harm: float
    self_harm_intent: float
    self_harm_instructions: float
    sexual: float
    sexual_minors: float
    violence: float
    violence_graphic: float

class RerankResponse(OpenAIObject):
    """Document reranking response"""
    id: Optional[str] = None
    results: List[RerankResult]
    meta: Optional[Dict[str, Any]] = None
    usage: Optional[Usage] = None

class RerankResult:
    """Individual reranked document"""
    index: int
    relevance_score: float
    document: Optional[Dict[str, Any]] = None

Usage Examples

Embedding Generation

import litellm

# Single text embedding
response = litellm.embedding(
    model="text-embedding-ada-002",
    input="Hello, world!"
)

embedding_vector = response.data[0].embedding
print(f"Embedding dimensions: {len(embedding_vector)}")

# Batch embedding generation
texts = [
    "First document text",
    "Second document text", 
    "Third document text"
]

response = litellm.embedding(
    model="text-embedding-ada-002",
    input=texts
)

for i, data in enumerate(response.data):
    print(f"Document {i}: {len(data.embedding)} dimensions")

# Different embedding providers
cohere_response = litellm.embedding(
    model="embed-english-v3.0",
    input="Text for Cohere embedding"
)

anthropic_response = litellm.embedding(
    model="voyage-large-2",
    input="Text for Voyage embedding"
)

Image Generation

# Basic image generation
response = litellm.image_generation(
    prompt="A futuristic cityscape at sunset",
    model="dall-e-3",
    size="1024x1024",
    quality="hd"
)

image_url = response.data[0].url
print(f"Generated image: {image_url}")

# Multiple images
response = litellm.image_generation(
    prompt="A cute robot assistant",
    model="dall-e-2",
    n=4,
    size="512x512"
)

for i, image in enumerate(response.data):
    print(f"Image {i}: {image.url}")

# Base64 format
response = litellm.image_generation(
    prompt="Abstract art with vibrant colors",
    model="dall-e-3",
    response_format="b64_json"
)

image_data = response.data[0].b64_json
# Save base64 image data to file

Audio Transcription

# Transcribe audio file
with open("audio.mp3", "rb") as audio_file:
    response = litellm.transcription(
        model="whisper-1",
        file=audio_file,
        language="en",
        response_format="verbose_json",
        timestamp_granularities=["word", "segment"]
    )

print("Transcription:", response.text)
print("Language:", response.language)
print("Duration:", response.duration)

# Process segments with timestamps
for segment in response.segments:
    print(f"[{segment.start:.2f}-{segment.end:.2f}]: {segment.text}")

# Process individual words
for word in response.words:
    print(f"{word.word} ({word.start:.2f}-{word.end:.2f})")

# Different response formats
srt_response = litellm.transcription(
    model="whisper-1",
    file="audio.mp3",
    response_format="srt"
)
print("SRT format:", srt_response.text)

Speech Synthesis

# Generate speech from text
audio_data = litellm.speech(
    model="tts-1",
    input="Hello, this is a test of text-to-speech synthesis.",
    voice="alloy",
    response_format="mp3"
)

# Save audio to file
with open("output.mp3", "wb") as f:
    f.write(audio_data)

# Different voices and formats
voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]

for voice in voices:
    audio = litellm.speech(
        model="tts-1-hd",
        input=f"This is the {voice} voice.",
        voice=voice,
        response_format="wav",
        speed=1.2
    )
    
    with open(f"voice_{voice}.wav", "wb") as f:
        f.write(audio)

Content Moderation

# Single text moderation
response = litellm.moderation(
    input="This is a sample text to check for policy violations."
)

result = response.results[0]
if result.flagged:
    print("Content flagged for policy violation")
    for category, flagged in result.categories.__dict__.items():
        if flagged:
            score = getattr(result.category_scores, category)
            print(f"  {category}: {score:.3f}")

# Batch moderation
texts = [
    "First text to moderate",
    "Second text to moderate",
    "Third text to moderate"
]

response = litellm.moderation(input=texts)

for i, result in enumerate(response.results):
    print(f"Text {i}: {'FLAGGED' if result.flagged else 'OK'}")

OpenAI API Compatibility

LiteLLM provides complete compatibility with OpenAI's additional APIs including Assistants, Batch processing, Files, and Fine-tuning. These functions maintain identical signatures and behavior to OpenAI's API.

Assistants API

Create and manage AI assistants with custom instructions, knowledge bases, and tool capabilities.

def get_assistants(
    limit: Optional[int] = None,
    order: Optional[str] = None,
    after: Optional[str] = None,
    before: Optional[str] = None,
    **kwargs
) -> AssistantList:
    """List assistants in the organization."""

async def aget_assistants(**kwargs) -> AssistantList:
    """Async version of get_assistants."""

def create_assistants(
    model: str,
    name: Optional[str] = None,
    description: Optional[str] = None,
    instructions: Optional[str] = None,
    tools: Optional[List[Dict[str, Any]]] = None,
    tool_resources: Optional[Dict[str, Any]] = None,
    metadata: Optional[Dict[str, str]] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    response_format: Optional[Dict[str, Any]] = None,
    **kwargs
) -> Assistant:
    """Create a new assistant."""

async def acreate_assistants(**kwargs) -> Assistant:
    """Async version of create_assistants."""

def delete_assistant(assistant_id: str, **kwargs) -> DeletionStatus:
    """Delete an assistant."""

async def adelete_assistant(assistant_id: str, **kwargs) -> DeletionStatus:
    """Async version of delete_assistant."""

def create_thread(
    messages: Optional[List[Dict[str, Any]]] = None,
    tool_resources: Optional[Dict[str, Any]] = None,
    metadata: Optional[Dict[str, str]] = None,
    **kwargs
) -> Thread:
    """Create a conversation thread."""

async def acreate_thread(**kwargs) -> Thread:
    """Async version of create_thread."""

def get_thread(thread_id: str, **kwargs) -> Thread:
    """Retrieve a thread by ID."""

async def aget_thread(thread_id: str, **kwargs) -> Thread:
    """Async version of get_thread."""

def add_message(
    thread_id: str,
    role: str,
    content: str,
    attachments: Optional[List[Dict[str, Any]]] = None,
    metadata: Optional[Dict[str, str]] = None,
    **kwargs
) -> ThreadMessage:
    """Add a message to a thread."""

async def a_add_message(**kwargs) -> ThreadMessage:
    """Async version of add_message."""

def get_messages(
    thread_id: str,
    limit: Optional[int] = None,
    order: Optional[str] = None,
    after: Optional[str] = None,
    before: Optional[str] = None,
    run_id: Optional[str] = None,
    **kwargs
) -> ThreadMessageList:
    """Get messages from a thread."""

async def aget_messages(**kwargs) -> ThreadMessageList:
    """Async version of get_messages."""

def run_thread(
    thread_id: str,
    assistant_id: str,
    model: Optional[str] = None,
    instructions: Optional[str] = None,
    additional_instructions: Optional[str] = None,
    additional_messages: Optional[List[Dict[str, Any]]] = None,
    tools: Optional[List[Dict[str, Any]]] = None,
    metadata: Optional[Dict[str, str]] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    stream: Optional[bool] = None,
    max_prompt_tokens: Optional[int] = None,
    max_completion_tokens: Optional[int] = None,
    truncation_strategy: Optional[Dict[str, Any]] = None,
    tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
    parallel_tool_calls: Optional[bool] = None,
    response_format: Optional[Dict[str, Any]] = None,
    **kwargs
) -> Run:
    """Run an assistant on a thread."""

async def arun_thread(**kwargs) -> Run:
    """Async version of run_thread."""

def run_thread_stream(
    thread_id: str,
    assistant_id: str,
    **kwargs
) -> Iterator[AssistantEventHandler]:
    """Stream assistant run events."""

async def arun_thread_stream(**kwargs) -> AsyncIterator[AssistantEventHandler]:
    """Async version of run_thread_stream."""

Batch API

Process multiple requests in batches for cost-effective bulk operations.

def create_batch(
    input_file_id: str,
    endpoint: str,
    completion_window: str = "24h",
    metadata: Optional[Dict[str, str]] = None,
    **kwargs
) -> Batch:
    """Create a batch processing job."""

def retrieve_batch(batch_id: str, **kwargs) -> Batch:
    """Retrieve batch job status and results."""

def list_batches(
    after: Optional[str] = None,
    limit: Optional[int] = None,
    **kwargs
) -> BatchList:
    """List batch processing jobs."""

def cancel_batch(batch_id: str, **kwargs) -> Batch:
    """Cancel a batch processing job."""

Files API

Upload, manage, and process files for use with other OpenAI services.

def create_file(
    file: Union[str, bytes, IO],
    purpose: str,
    **kwargs
) -> FileObject:
    """Upload a file for use with other services."""

async def acreate_file(**kwargs) -> FileObject:
    """Async version of create_file."""

def file_retrieve(file_id: str, **kwargs) -> FileObject:
    """Retrieve file information."""

async def afile_retrieve(file_id: str, **kwargs) -> FileObject:
    """Async version of file_retrieve."""

def file_delete(file_id: str, **kwargs) -> DeletionStatus:
    """Delete a file."""

async def afile_delete(file_id: str, **kwargs) -> DeletionStatus:
    """Async version of file_delete."""

def file_list(
    purpose: Optional[str] = None,
    after: Optional[str] = None,
    limit: Optional[int] = None,
    order: Optional[str] = None,
    **kwargs
) -> FileList:
    """List uploaded files."""

async def afile_list(**kwargs) -> FileList:
    """Async version of file_list."""

def file_content(file_id: str, **kwargs) -> bytes:
    """Retrieve file content."""

async def afile_content(file_id: str, **kwargs) -> bytes:
    """Async version of file_content."""

Fine-tuning API

Create and manage fine-tuning jobs for custom model training.

def create_fine_tuning_job(
    training_file: str,
    model: str,
    hyperparameters: Optional[Dict[str, Any]] = None,
    suffix: Optional[str] = None,
    validation_file: Optional[str] = None,
    integrations: Optional[List[Dict[str, Any]]] = None,
    seed: Optional[int] = None,
    **kwargs
) -> FineTuningJob:
    """Create a fine-tuning job."""

async def acreate_fine_tuning_job(**kwargs) -> FineTuningJob:
    """Async version of create_fine_tuning_job."""

def cancel_fine_tuning_job(fine_tuning_job_id: str, **kwargs) -> FineTuningJob:
    """Cancel a fine-tuning job."""

async def acancel_fine_tuning_job(fine_tuning_job_id: str, **kwargs) -> FineTuningJob:
    """Async version of cancel_fine_tuning_job."""

def list_fine_tuning_jobs(
    after: Optional[str] = None,
    limit: Optional[int] = None,
    **kwargs
) -> FineTuningJobList:
    """List fine-tuning jobs."""

async def alist_fine_tuning_jobs(**kwargs) -> FineTuningJobList:
    """Async version of list_fine_tuning_jobs."""

def retrieve_fine_tuning_job(fine_tuning_job_id: str, **kwargs) -> FineTuningJob:
    """Retrieve fine-tuning job details."""

async def aretrieve_fine_tuning_job(fine_tuning_job_id: str, **kwargs) -> FineTuningJob:
    """Async version of retrieve_fine_tuning_job."""

OpenAI Compatibility Response Objects

class Assistant(OpenAIObject):
    """AI Assistant configuration"""
    id: str
    object: str = "assistant"
    created_at: int
    name: Optional[str]
    description: Optional[str]
    model: str
    instructions: Optional[str]
    tools: List[Dict[str, Any]]
    tool_resources: Optional[Dict[str, Any]]
    metadata: Dict[str, str]
    temperature: Optional[float]
    top_p: Optional[float]
    response_format: Optional[Dict[str, Any]]

class Thread(OpenAIObject):
    """Conversation thread"""
    id: str
    object: str = "thread"
    created_at: int
    tool_resources: Optional[Dict[str, Any]]
    metadata: Dict[str, str]

class ThreadMessage(OpenAIObject):
    """Message in a conversation thread"""
    id: str
    object: str = "thread.message"
    created_at: int
    assistant_id: Optional[str]
    thread_id: str
    run_id: Optional[str]
    role: str
    content: List[Dict[str, Any]]
    attachments: Optional[List[Dict[str, Any]]]
    metadata: Dict[str, str]

class Run(OpenAIObject):
    """Assistant run on a thread"""
    id: str
    object: str = "thread.run"
    created_at: int
    assistant_id: str
    thread_id: str
    status: str
    started_at: Optional[int]
    expires_at: Optional[int]
    cancelled_at: Optional[int]
    failed_at: Optional[int]
    completed_at: Optional[int]
    required_action: Optional[Dict[str, Any]]
    last_error: Optional[Dict[str, Any]]
    model: str
    instructions: str
    tools: List[Dict[str, Any]]
    metadata: Dict[str, str]
    usage: Optional[Usage]
    temperature: Optional[float]
    top_p: Optional[float]
    max_completion_tokens: Optional[int]
    max_prompt_tokens: Optional[int]
    truncation_strategy: Optional[Dict[str, Any]]
    incomplete_details: Optional[Dict[str, Any]]
    tool_choice: Optional[Union[str, Dict[str, Any]]]
    parallel_tool_calls: Optional[bool]
    response_format: Optional[Dict[str, Any]]

class Batch(OpenAIObject):
    """Batch processing job"""
    id: str
    object: str = "batch"
    endpoint: str
    errors: Optional[Dict[str, Any]]
    input_file_id: str
    completion_window: str
    status: str
    output_file_id: Optional[str]
    error_file_id: Optional[str]
    created_at: int
    in_progress_at: Optional[int]
    expires_at: Optional[int]
    finalizing_at: Optional[int]
    completed_at: Optional[int]
    failed_at: Optional[int]
    expired_at: Optional[int]
    cancelling_at: Optional[int]
    cancelled_at: Optional[int]
    request_counts: Optional[Dict[str, int]]
    metadata: Optional[Dict[str, str]]

class FileObject(OpenAIObject):
    """Uploaded file"""
    id: str
    object: str = "file"
    bytes: int
    created_at: int
    filename: str
    purpose: str
    status: Optional[str]
    status_details: Optional[str]

class FineTuningJob(OpenAIObject):
    """Fine-tuning job"""
    id: str
    object: str = "fine_tuning.job"
    created_at: int
    error: Optional[Dict[str, Any]]
    fine_tuned_model: Optional[str]
    finished_at: Optional[int]
    hyperparameters: Dict[str, Any]
    model: str
    organization_id: str
    result_files: List[str]
    seed: Optional[int]
    status: str
    trained_tokens: Optional[int]
    training_file: str
    validation_file: Optional[str]
    integrations: Optional[List[Dict[str, Any]]]
    estimated_finish: Optional[int]

class DeletionStatus(OpenAIObject):
    """Deletion confirmation"""
    id: str
    object: str
    deleted: bool

Document Reranking

# Rerank documents by relevance
query = "machine learning algorithms"
documents = [
    "Introduction to neural networks and deep learning",
    "Statistical methods in data analysis", 
    "Computer vision with convolutional networks",
    "Natural language processing fundamentals",
    "Supervised learning algorithms overview"
]

response = litellm.rerank(
    model="rerank-english-v3.0",
    query=query,
    documents=documents,
    top_n=3,
    return_documents=True
)

print("Reranked results:")
for result in response.results:
    doc_text = documents[result.index]
    print(f"Score: {result.relevance_score:.3f} - {doc_text}")

# Rerank with structured documents
documents = [
    {"id": "doc1", "title": "ML Overview", "content": "Machine learning fundamentals"},
    {"id": "doc2", "title": "Statistics", "content": "Statistical analysis methods"},
    {"id": "doc3", "title": "Deep Learning", "content": "Neural network architectures"}
]

response = litellm.rerank(
    model="rerank-english-v3.0",
    query="neural networks",
    documents=documents,
    return_documents=True
)

Async Operations

import asyncio

async def process_multiple_apis():
    # Concurrent API calls
    embedding_task = litellm.aembedding(
        model="text-embedding-ada-002",
        input="Text to embed"
    )
    
    image_task = litellm.aimage_generation(
        prompt="A beautiful landscape",
        model="dall-e-3"
    )
    
    transcription_task = litellm.atranscription(
        model="whisper-1",
        file="audio.mp3"
    )
    
    # Wait for all tasks to complete
    embedding_resp, image_resp, transcription_resp = await asyncio.gather(
        embedding_task, image_task, transcription_task
    )
    
    return {
        "embedding": embedding_resp.data[0].embedding,
        "image_url": image_resp.data[0].url,
        "transcription": transcription_resp.text
    }

results = asyncio.run(process_multiple_apis())

Provider-Specific Usage

# OpenAI embedding with specific dimensions
openai_embedding = litellm.embedding(
    model="text-embedding-3-large",
    input="Sample text",
    dimensions=1024  # Reduced dimensions
)

# Cohere embedding with specific input type
cohere_embedding = litellm.embedding(
    model="embed-english-v3.0",
    input="Sample text",
    custom_llm_provider="cohere",
    input_type="search_document"  # Cohere-specific parameter
)

# Azure OpenAI image generation
azure_image = litellm.image_generation(
    prompt="Digital art",
    model="azure/dall-e-3",
    api_base="https://my-resource.openai.azure.com/",
    api_version="2024-02-01",
    api_key="azure-api-key"
)

Install with Tessl CLI