Python client for Together's Cloud Platform providing comprehensive AI model APIs
Advanced conversational AI interface supporting text, image, and video inputs with streaming capabilities, comprehensive configuration options, and both synchronous and asynchronous operations.
Creates chat completions with conversational context and message history.
def create(
*,
messages: List[Dict[str, Any]],
model: str,
max_tokens: Optional[int] = None,
stop: Optional[List[str]] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
top_k: Optional[int] = None,
repetition_penalty: Optional[float] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
min_p: Optional[float] = None,
logit_bias: Optional[Dict[str, float]] = None,
seed: Optional[int] = None,
stream: bool = False,
logprobs: Optional[int] = None,
echo: Optional[bool] = None,
n: Optional[int] = None,
safety_model: Optional[str] = None,
response_format: Optional[Dict[str, Any]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
tool_choice: Optional[Union[str, Dict[str, Union[str, Dict[str, str]]]]] = None,
**kwargs
) -> Union[ChatCompletionResponse, Iterator[ChatCompletionChunk]]:
"""
Create a chat completion with conversational messages.
Args:
messages: List of message objects with role and content (Dict[str, Any])
model: Model identifier for chat completion
max_tokens: Maximum tokens to generate in response
stop: List of stop sequences to end generation
temperature: Sampling temperature (0.0 to 2.0)
top_p: Nucleus sampling probability threshold
top_k: Top-k sampling parameter
repetition_penalty: Penalty for repeating tokens
presence_penalty: Penalty for token presence (-2.0 to 2.0)
frequency_penalty: Penalty for token frequency (-2.0 to 2.0)
min_p: Minimum percentage for token consideration (0.0 to 1.0)
logit_bias: Modify likelihood of specific tokens (-100 to 100)
seed: Seed for reproducible generation
stream: Enable streaming response chunks
logprobs: Number of log probabilities to return
echo: Include prompt in response with logprobs
n: Number of completion choices to generate
safety_model: Safety moderation model to apply
response_format: Output format specification
tools: List of tool definitions for function calling
tool_choice: Control tool selection behavior
Returns:
ChatCompletionResponse or Iterator[ChatCompletionChunk] when streaming
"""Supports messages with text, images, and video content in conversational context.
def create(
model: str,
messages: List[Dict[str, Union[str, List[dict]]]],
**kwargs
) -> ChatCompletionResponse:
"""
Create multi-modal chat completions with images and video.
Message content can be:
- String for text-only messages
- List of content objects for multi-modal messages
Content object types:
- {"type": "text", "text": str}
- {"type": "image_url", "image_url": {"url": str}}
- {"type": "video_url", "video_url": {"url": str}}
"""Real-time streaming of chat completion responses as they are generated.
def create(
model: str,
messages: List[dict],
stream: bool = True,
**kwargs
) -> Iterator[ChatCompletionChunk]:
"""
Stream chat completion chunks in real-time.
Returns:
Iterator yielding ChatCompletionChunk objects
"""Asynchronous chat completion operations for concurrent processing.
async def create(
model: str,
messages: List[dict],
**kwargs
) -> ChatCompletionResponse:
"""
Asynchronously create chat completions.
Returns:
ChatCompletionResponse with generated content
"""from together import Together
client = Together()
response = client.chat.completions.create(
model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain quantum computing in simple terms."}
],
max_tokens=300,
temperature=0.7
)
print(response.choices[0].message.content)response = client.chat.completions.create(
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
messages=[{
"role": "user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}],
max_tokens=200
)
print(response.choices[0].message.content)response = client.chat.completions.create(
model="Qwen/Qwen2.5-VL-72B-Instruct",
messages=[{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe what happens in this video."
},
{
"type": "video_url",
"video_url": {
"url": "https://example.com/video.mp4"
}
}
]
}],
max_tokens=500
)
print(response.choices[0].message.content)stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
messages=[{"role": "user", "content": "Write a short story about AI"}],
stream=True,
max_tokens=500
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)import asyncio
from together import AsyncTogether
async def process_chats():
client = AsyncTogether()
messages_list = [
[{"role": "user", "content": "Explain machine learning"}],
[{"role": "user", "content": "What is deep learning?"}],
[{"role": "user", "content": "How do neural networks work?"}]
]
tasks = [
client.chat.completions.create(
model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
messages=messages,
max_tokens=200
)
for messages in messages_list
]
responses = await asyncio.gather(*tasks)
for i, response in enumerate(responses):
print(f"Response {i+1}: {response.choices[0].message.content}")
asyncio.run(process_chats())response = client.chat.completions.create(
model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
messages=[{"role": "user", "content": "The capital of France is"}],
logprobs=3,
max_tokens=10
)
logprobs_data = response.choices[0].logprobs
for token, logprob in zip(logprobs_data.tokens, logprobs_data.token_logprobs):
print(f"Token: '{token}', Log Probability: {logprob}")class ChatCompletionRequest:
model: str
messages: List[dict]
max_tokens: Optional[int] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
repetition_penalty: Optional[float] = None
stream: bool = False
logprobs: Optional[int] = None
echo: Optional[bool] = None
n: Optional[int] = None
presence_penalty: Optional[float] = None
frequency_penalty: Optional[float] = None
logit_bias: Optional[Dict[str, float]] = None
stop: Optional[Union[str, List[str]]] = None
safety_model: Optional[str] = Noneclass ChatCompletionResponse:
id: str
object: str
created: int
model: str
choices: List[ChatChoice]
usage: Usage
class ChatChoice:
index: int
message: ChatMessage
finish_reason: Optional[str]
logprobs: Optional[Logprobs]
class ChatMessage:
role: str
content: Optional[str]
class Usage:
prompt_tokens: int
completion_tokens: int
total_tokens: int
class Logprobs:
tokens: List[str]
token_logprobs: List[Optional[float]]
top_logprobs: Optional[List[Dict[str, float]]]class ChatCompletionChunk:
id: str
object: str
created: int
model: str
choices: List[ChatChoiceDelta]
class ChatChoiceDelta:
index: int
delta: ChatDelta
finish_reason: Optional[str]
class ChatDelta:
role: Optional[str]
content: Optional[str]class TextContent:
type: Literal["text"]
text: str
class ImageContent:
type: Literal["image_url"]
image_url: ImageUrl
class VideoContent:
type: Literal["video_url"]
video_url: VideoUrl
class ImageUrl:
url: str
detail: Optional[Literal["low", "high", "auto"]] = None
class VideoUrl:
url: strInstall with Tessl CLI
npx tessl i tessl/pypi-together