Python client for Together's Cloud Platform providing comprehensive AI model APIs
Large-scale inference jobs with 24-hour turnaround time for processing thousands of requests efficiently and cost-effectively. Ideal for non-real-time processing of large datasets with significant cost savings compared to real-time API calls.
Submit a batch inference job for processing multiple requests.
def create_batch(
file_id: str,
endpoint: str
) -> BatchJob:
"""
Create a batch processing job.
Args:
file_id: ID of uploaded JSONL file containing requests
endpoint: API endpoint for batch processing (e.g., "/v1/chat/completions")
Returns:
BatchJob with job information and status
"""Retrieve the status and details of a batch job.
def get_batch(id: str) -> BatchJob:
"""
Get batch job status and details.
Args:
id: Batch job identifier
Returns:
BatchJob with current status and metadata
"""List all batch jobs with their statuses.
def list_batches() -> List[BatchJob]:
"""
List all batch jobs.
Returns:
List of BatchJob objects
"""All batch operations support asynchronous execution.
async def create_batch(file_id: str, endpoint: str) -> BatchJob: ...
async def get_batch(id: str) -> BatchJob: ...
async def list_batches() -> List[BatchJob]: ...from together import Together
import json
client = Together()
# Prepare batch requests file
batch_requests = [
{
"custom_id": "request-1",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
"messages": [{"role": "user", "content": "What is AI?"}],
"max_tokens": 100
}
},
{
"custom_id": "request-2",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
"messages": [{"role": "user", "content": "Explain machine learning"}],
"max_tokens": 100
}
}
]
# Save batch requests to JSONL file
with open("batch_requests.jsonl", "w") as f:
for request in batch_requests:
f.write(json.dumps(request) + "\n")
# Upload batch file
batch_file = client.files.upload(
file="batch_requests.jsonl",
purpose="batch-api"
)
# Create batch job
batch = client.batches.create_batch(
file_id=batch_file.id,
endpoint="/v1/chat/completions"
)
print(f"Batch job created: {batch.id}")
print(f"Status: {batch.status}")import time
def monitor_batch_job(client: Together, batch_id: str):
"""Monitor batch job until completion."""
while True:
batch = client.batches.get_batch(batch_id)
print(f"Status: {batch.status}")
if batch.status == "COMPLETED":
print("Batch processing completed!")
return batch
elif batch.status == "FAILED":
print("Batch processing failed!")
return batch
elif batch.status == "CANCELLED":
print("Batch processing was cancelled!")
return batch
time.sleep(60) # Check every minute
# Monitor the batch job
completed_batch = monitor_batch_job(client, batch.id)
if completed_batch.status == "COMPLETED":
# Download results
output_content = client.files.retrieve_content(
id=completed_batch.output_file_id,
output="batch_results.jsonl"
)
print("Results downloaded to batch_results.jsonl")def create_large_batch_job(texts: list, model: str, prompt_template: str):
"""Create a batch job for processing many texts."""
batch_requests = []
for i, text in enumerate(texts):
request = {
"custom_id": f"text-{i}",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": model,
"messages": [
{"role": "user", "content": prompt_template.format(text=text)}
],
"max_tokens": 200,
"temperature": 0.3
}
}
batch_requests.append(request)
# Save to file
filename = f"batch_texts_{len(texts)}.jsonl"
with open(filename, "w") as f:
for request in batch_requests:
f.write(json.dumps(request) + "\n")
return filename
# Process 1000 documents
documents = [f"Document content {i}" for i in range(1000)]
prompt = "Summarize the following document in one sentence: {text}"
batch_file_path = create_large_batch_job(
texts=documents,
model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
prompt_template=prompt
)
# Upload and process
batch_file = client.files.upload(file=batch_file_path, purpose="batch-api")
batch_job = client.batches.create_batch(
file_id=batch_file.id,
endpoint="/v1/chat/completions"
)
print(f"Created batch job for {len(documents)} documents: {batch_job.id}")def process_batch_results(results_file: str):
"""Process and analyze batch job results."""
results = []
with open(results_file, "r") as f:
for line in f:
result = json.loads(line)
results.append(result)
# Analyze results
successful = [r for r in results if r.get("response")]
failed = [r for r in results if r.get("error")]
print(f"Total results: {len(results)}")
print(f"Successful: {len(successful)}")
print(f"Failed: {len(failed)}")
# Extract responses
responses = []
for result in successful:
custom_id = result["custom_id"]
content = result["response"]["body"]["choices"][0]["message"]["content"]
responses.append({
"id": custom_id,
"response": content
})
return responses
# Process results after batch completion
if completed_batch.status == "COMPLETED":
responses = process_batch_results("batch_results.jsonl")
print("Sample responses:")
for response in responses[:5]:
print(f"{response['id']}: {response['response'][:100]}...")class BatchJob:
id: str
object: str
endpoint: str
errors: Optional[Dict[str, Any]]
input_file_id: str
completion_window: str
status: str
output_file_id: Optional[str]
error_file_id: Optional[str]
created_at: int
in_progress_at: Optional[int]
expires_at: Optional[int]
finalizing_at: Optional[int]
completed_at: Optional[int]
failed_at: Optional[int]
expired_at: Optional[int]
cancelling_at: Optional[int]
cancelled_at: Optional[int]
request_counts: Optional[Dict[str, int]]
class BatchJobStatus:
VALIDATING = "validating"
FAILED = "failed"
IN_PROGRESS = "in_progress"
FINALIZING = "finalizing"
COMPLETED = "completed"
EXPIRED = "expired"
CANCELLING = "cancelling"
CANCELLED = "cancelled"
class BatchEndpoint:
CHAT_COMPLETIONS = "/v1/chat/completions"
COMPLETIONS = "/v1/completions"
EMBEDDINGS = "/v1/embeddings"Batch request files must be in JSONL format with each line containing:
class BatchRequest:
custom_id: str # Unique identifier for the request
method: str # HTTP method (usually "POST")
url: str # API endpoint URL
body: dict # Request body parametersInstall with Tessl CLI
npx tessl i tessl/pypi-together