CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-together

Python client for Together's Cloud Platform providing comprehensive AI model APIs

Overview
Eval results
Files

chat-completions.mddocs/

Chat Completions

Advanced conversational AI interface supporting text, image, and video inputs with streaming capabilities, comprehensive configuration options, and both synchronous and asynchronous operations.

Capabilities

Basic Chat Completion

Creates chat completions with conversational context and message history.

def create(
    *,
    messages: List[Dict[str, Any]],
    model: str,
    max_tokens: Optional[int] = None,
    stop: Optional[List[str]] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    top_k: Optional[int] = None,
    repetition_penalty: Optional[float] = None,
    presence_penalty: Optional[float] = None,
    frequency_penalty: Optional[float] = None,
    min_p: Optional[float] = None,
    logit_bias: Optional[Dict[str, float]] = None,
    seed: Optional[int] = None,
    stream: bool = False,
    logprobs: Optional[int] = None,
    echo: Optional[bool] = None,
    n: Optional[int] = None,
    safety_model: Optional[str] = None,
    response_format: Optional[Dict[str, Any]] = None,
    tools: Optional[List[Dict[str, Any]]] = None,
    tool_choice: Optional[Union[str, Dict[str, Union[str, Dict[str, str]]]]] = None,
    **kwargs
) -> Union[ChatCompletionResponse, Iterator[ChatCompletionChunk]]:
    """
    Create a chat completion with conversational messages.

    Args:
        messages: List of message objects with role and content (Dict[str, Any])
        model: Model identifier for chat completion
        max_tokens: Maximum tokens to generate in response
        stop: List of stop sequences to end generation
        temperature: Sampling temperature (0.0 to 2.0)
        top_p: Nucleus sampling probability threshold
        top_k: Top-k sampling parameter
        repetition_penalty: Penalty for repeating tokens
        presence_penalty: Penalty for token presence (-2.0 to 2.0)
        frequency_penalty: Penalty for token frequency (-2.0 to 2.0)
        min_p: Minimum percentage for token consideration (0.0 to 1.0)
        logit_bias: Modify likelihood of specific tokens (-100 to 100)
        seed: Seed for reproducible generation
        stream: Enable streaming response chunks
        logprobs: Number of log probabilities to return
        echo: Include prompt in response with logprobs
        n: Number of completion choices to generate
        safety_model: Safety moderation model to apply
        response_format: Output format specification
        tools: List of tool definitions for function calling
        tool_choice: Control tool selection behavior

    Returns:
        ChatCompletionResponse or Iterator[ChatCompletionChunk] when streaming
    """

Multi-Modal Chat

Supports messages with text, images, and video content in conversational context.

def create(
    model: str,
    messages: List[Dict[str, Union[str, List[dict]]]],
    **kwargs
) -> ChatCompletionResponse:
    """
    Create multi-modal chat completions with images and video.

    Message content can be:
    - String for text-only messages
    - List of content objects for multi-modal messages
    
    Content object types:
    - {"type": "text", "text": str}
    - {"type": "image_url", "image_url": {"url": str}}
    - {"type": "video_url", "video_url": {"url": str}}
    """

Streaming Chat

Real-time streaming of chat completion responses as they are generated.

def create(
    model: str,
    messages: List[dict],
    stream: bool = True,
    **kwargs
) -> Iterator[ChatCompletionChunk]:
    """
    Stream chat completion chunks in real-time.

    Returns:
        Iterator yielding ChatCompletionChunk objects
    """

Async Chat Completion

Asynchronous chat completion operations for concurrent processing.

async def create(
    model: str,
    messages: List[dict],
    **kwargs
) -> ChatCompletionResponse:
    """
    Asynchronously create chat completions.

    Returns:
        ChatCompletionResponse with generated content
    """

Usage Examples

Simple Text Chat

from together import Together

client = Together()

response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain quantum computing in simple terms."}
    ],
    max_tokens=300,
    temperature=0.7
)

print(response.choices[0].message.content)

Multi-Modal Chat with Image

response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
    messages=[{
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What's in this image?"
            },
            {
                "type": "image_url", 
                "image_url": {
                    "url": "https://example.com/image.jpg"
                }
            }
        ]
    }],
    max_tokens=200
)

print(response.choices[0].message.content)

Video Analysis

response = client.chat.completions.create(
    model="Qwen/Qwen2.5-VL-72B-Instruct",
    messages=[{
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Describe what happens in this video."
            },
            {
                "type": "video_url",
                "video_url": {
                    "url": "https://example.com/video.mp4"
                }
            }
        ]
    }],
    max_tokens=500
)

print(response.choices[0].message.content)

Streaming Chat

stream = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
    messages=[{"role": "user", "content": "Write a short story about AI"}],
    stream=True,
    max_tokens=500
)

for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Async Chat Processing

import asyncio
from together import AsyncTogether

async def process_chats():
    client = AsyncTogether()
    
    messages_list = [
        [{"role": "user", "content": "Explain machine learning"}],
        [{"role": "user", "content": "What is deep learning?"}],
        [{"role": "user", "content": "How do neural networks work?"}]
    ]
    
    tasks = [
        client.chat.completions.create(
            model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
            messages=messages,
            max_tokens=200
        )
        for messages in messages_list
    ]
    
    responses = await asyncio.gather(*tasks)
    
    for i, response in enumerate(responses):
        print(f"Response {i+1}: {response.choices[0].message.content}")

asyncio.run(process_chats())

Logprobs Analysis

response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
    messages=[{"role": "user", "content": "The capital of France is"}],
    logprobs=3,
    max_tokens=10
)

logprobs_data = response.choices[0].logprobs
for token, logprob in zip(logprobs_data.tokens, logprobs_data.token_logprobs):
    print(f"Token: '{token}', Log Probability: {logprob}")

Types

Request Types

class ChatCompletionRequest:
    model: str
    messages: List[dict]
    max_tokens: Optional[int] = None
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    top_k: Optional[int] = None
    repetition_penalty: Optional[float] = None
    stream: bool = False
    logprobs: Optional[int] = None
    echo: Optional[bool] = None
    n: Optional[int] = None
    presence_penalty: Optional[float] = None
    frequency_penalty: Optional[float] = None
    logit_bias: Optional[Dict[str, float]] = None
    stop: Optional[Union[str, List[str]]] = None
    safety_model: Optional[str] = None

Response Types

class ChatCompletionResponse:
    id: str
    object: str
    created: int
    model: str
    choices: List[ChatChoice]
    usage: Usage
    
class ChatChoice:
    index: int
    message: ChatMessage
    finish_reason: Optional[str]
    logprobs: Optional[Logprobs]

class ChatMessage:
    role: str
    content: Optional[str]
    
class Usage:
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int

class Logprobs:
    tokens: List[str]
    token_logprobs: List[Optional[float]]
    top_logprobs: Optional[List[Dict[str, float]]]

Streaming Types

class ChatCompletionChunk:
    id: str
    object: str
    created: int
    model: str
    choices: List[ChatChoiceDelta]

class ChatChoiceDelta:
    index: int
    delta: ChatDelta
    finish_reason: Optional[str]

class ChatDelta:
    role: Optional[str]
    content: Optional[str]

Message Content Types

class TextContent:
    type: Literal["text"]
    text: str

class ImageContent:
    type: Literal["image_url"]
    image_url: ImageUrl

class VideoContent:
    type: Literal["video_url"]
    video_url: VideoUrl

class ImageUrl:
    url: str
    detail: Optional[Literal["low", "high", "auto"]] = None

class VideoUrl:
    url: str

Install with Tessl CLI

npx tessl i tessl/pypi-together

docs

audio.md

batch.md

chat-completions.md

code-interpreter.md

completions.md

embeddings.md

endpoints.md

evaluation.md

files.md

fine-tuning.md

images.md

index.md

models.md

rerank.md

tile.json