or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio.mdbatch.mdchat-completions.mdcode-interpreter.mdcompletions.mdembeddings.mdendpoints.mdevaluation.mdfiles.mdfine-tuning.mdimages.mdindex.mdmodels.mdrerank.md

batch.mddocs/

0

# Batch Processing

1

2

Large-scale inference jobs with 24-hour turnaround time for processing thousands of requests efficiently and cost-effectively. Ideal for non-real-time processing of large datasets with significant cost savings compared to real-time API calls.

3

4

## Capabilities

5

6

### Create Batch Job

7

8

Submit a batch inference job for processing multiple requests.

9

10

```python { .api }

11

def create_batch(

12

file_id: str,

13

endpoint: str

14

) -> BatchJob:

15

"""

16

Create a batch processing job.

17

18

Args:

19

file_id: ID of uploaded JSONL file containing requests

20

endpoint: API endpoint for batch processing (e.g., "/v1/chat/completions")

21

22

Returns:

23

BatchJob with job information and status

24

"""

25

```

26

27

### Get Batch Status

28

29

Retrieve the status and details of a batch job.

30

31

```python { .api }

32

def get_batch(id: str) -> BatchJob:

33

"""

34

Get batch job status and details.

35

36

Args:

37

id: Batch job identifier

38

39

Returns:

40

BatchJob with current status and metadata

41

"""

42

```

43

44

### List Batch Jobs

45

46

List all batch jobs with their statuses.

47

48

```python { .api }

49

def list_batches() -> List[BatchJob]:

50

"""

51

List all batch jobs.

52

53

Returns:

54

List of BatchJob objects

55

"""

56

```

57

58

### Async Batch Operations

59

60

All batch operations support asynchronous execution.

61

62

```python { .api }

63

async def create_batch(file_id: str, endpoint: str) -> BatchJob: ...

64

async def get_batch(id: str) -> BatchJob: ...

65

async def list_batches() -> List[BatchJob]: ...

66

```

67

68

## Usage Examples

69

70

### Basic Batch Processing

71

72

```python

73

from together import Together

74

import json

75

76

client = Together()

77

78

# Prepare batch requests file

79

batch_requests = [

80

{

81

"custom_id": "request-1",

82

"method": "POST",

83

"url": "/v1/chat/completions",

84

"body": {

85

"model": "meta-llama/Llama-3.2-3B-Instruct-Turbo",

86

"messages": [{"role": "user", "content": "What is AI?"}],

87

"max_tokens": 100

88

}

89

},

90

{

91

"custom_id": "request-2",

92

"method": "POST",

93

"url": "/v1/chat/completions",

94

"body": {

95

"model": "meta-llama/Llama-3.2-3B-Instruct-Turbo",

96

"messages": [{"role": "user", "content": "Explain machine learning"}],

97

"max_tokens": 100

98

}

99

}

100

]

101

102

# Save batch requests to JSONL file

103

with open("batch_requests.jsonl", "w") as f:

104

for request in batch_requests:

105

f.write(json.dumps(request) + "\n")

106

107

# Upload batch file

108

batch_file = client.files.upload(

109

file="batch_requests.jsonl",

110

purpose="batch-api"

111

)

112

113

# Create batch job

114

batch = client.batches.create_batch(

115

file_id=batch_file.id,

116

endpoint="/v1/chat/completions"

117

)

118

119

print(f"Batch job created: {batch.id}")

120

print(f"Status: {batch.status}")

121

```

122

123

### Monitor Batch Progress

124

125

```python

126

import time

127

128

def monitor_batch_job(client: Together, batch_id: str):

129

"""Monitor batch job until completion."""

130

131

while True:

132

batch = client.batches.get_batch(batch_id)

133

print(f"Status: {batch.status}")

134

135

if batch.status == "COMPLETED":

136

print("Batch processing completed!")

137

return batch

138

elif batch.status == "FAILED":

139

print("Batch processing failed!")

140

return batch

141

elif batch.status == "CANCELLED":

142

print("Batch processing was cancelled!")

143

return batch

144

145

time.sleep(60) # Check every minute

146

147

# Monitor the batch job

148

completed_batch = monitor_batch_job(client, batch.id)

149

150

if completed_batch.status == "COMPLETED":

151

# Download results

152

output_content = client.files.retrieve_content(

153

id=completed_batch.output_file_id,

154

output="batch_results.jsonl"

155

)

156

print("Results downloaded to batch_results.jsonl")

157

```

158

159

### Large-Scale Text Processing

160

161

```python

162

def create_large_batch_job(texts: list, model: str, prompt_template: str):

163

"""Create a batch job for processing many texts."""

164

165

batch_requests = []

166

167

for i, text in enumerate(texts):

168

request = {

169

"custom_id": f"text-{i}",

170

"method": "POST",

171

"url": "/v1/chat/completions",

172

"body": {

173

"model": model,

174

"messages": [

175

{"role": "user", "content": prompt_template.format(text=text)}

176

],

177

"max_tokens": 200,

178

"temperature": 0.3

179

}

180

}

181

batch_requests.append(request)

182

183

# Save to file

184

filename = f"batch_texts_{len(texts)}.jsonl"

185

with open(filename, "w") as f:

186

for request in batch_requests:

187

f.write(json.dumps(request) + "\n")

188

189

return filename

190

191

# Process 1000 documents

192

documents = [f"Document content {i}" for i in range(1000)]

193

prompt = "Summarize the following document in one sentence: {text}"

194

195

batch_file_path = create_large_batch_job(

196

texts=documents,

197

model="meta-llama/Llama-3.2-3B-Instruct-Turbo",

198

prompt_template=prompt

199

)

200

201

# Upload and process

202

batch_file = client.files.upload(file=batch_file_path, purpose="batch-api")

203

batch_job = client.batches.create_batch(

204

file_id=batch_file.id,

205

endpoint="/v1/chat/completions"

206

)

207

208

print(f"Created batch job for {len(documents)} documents: {batch_job.id}")

209

```

210

211

### Process Batch Results

212

213

```python

214

def process_batch_results(results_file: str):

215

"""Process and analyze batch job results."""

216

217

results = []

218

219

with open(results_file, "r") as f:

220

for line in f:

221

result = json.loads(line)

222

results.append(result)

223

224

# Analyze results

225

successful = [r for r in results if r.get("response")]

226

failed = [r for r in results if r.get("error")]

227

228

print(f"Total results: {len(results)}")

229

print(f"Successful: {len(successful)}")

230

print(f"Failed: {len(failed)}")

231

232

# Extract responses

233

responses = []

234

for result in successful:

235

custom_id = result["custom_id"]

236

content = result["response"]["body"]["choices"][0]["message"]["content"]

237

responses.append({

238

"id": custom_id,

239

"response": content

240

})

241

242

return responses

243

244

# Process results after batch completion

245

if completed_batch.status == "COMPLETED":

246

responses = process_batch_results("batch_results.jsonl")

247

248

print("Sample responses:")

249

for response in responses[:5]:

250

print(f"{response['id']}: {response['response'][:100]}...")

251

```

252

253

## Types

254

255

### Batch Job Types

256

257

```python { .api }

258

class BatchJob:

259

id: str

260

object: str

261

endpoint: str

262

errors: Optional[Dict[str, Any]]

263

input_file_id: str

264

completion_window: str

265

status: str

266

output_file_id: Optional[str]

267

error_file_id: Optional[str]

268

created_at: int

269

in_progress_at: Optional[int]

270

expires_at: Optional[int]

271

finalizing_at: Optional[int]

272

completed_at: Optional[int]

273

failed_at: Optional[int]

274

expired_at: Optional[int]

275

cancelling_at: Optional[int]

276

cancelled_at: Optional[int]

277

request_counts: Optional[Dict[str, int]]

278

279

class BatchJobStatus:

280

VALIDATING = "validating"

281

FAILED = "failed"

282

IN_PROGRESS = "in_progress"

283

FINALIZING = "finalizing"

284

COMPLETED = "completed"

285

EXPIRED = "expired"

286

CANCELLING = "cancelling"

287

CANCELLED = "cancelled"

288

289

class BatchEndpoint:

290

CHAT_COMPLETIONS = "/v1/chat/completions"

291

COMPLETIONS = "/v1/completions"

292

EMBEDDINGS = "/v1/embeddings"

293

```

294

295

## Request Format

296

297

Batch request files must be in JSONL format with each line containing:

298

299

```python { .api }

300

class BatchRequest:

301

custom_id: str # Unique identifier for the request

302

method: str # HTTP method (usually "POST")

303

url: str # API endpoint URL

304

body: dict # Request body parameters

305

```

306

307

## Cost Benefits

308

309

- **50% cost reduction** compared to real-time API calls

310

- **24-hour processing window** for non-urgent tasks

311

- **Bulk processing** of up to 50MB of requests per job

312

- **Automatic retry** handling for failed requests