or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio.mdbatch.mdchat-completions.mdcode-interpreter.mdcompletions.mdembeddings.mdendpoints.mdevaluation.mdfiles.mdfine-tuning.mdimages.mdindex.mdmodels.mdrerank.md

completions.mddocs/

0

# Text Completions

1

2

Raw text completion interface for code generation, creative writing, and general text completion tasks. Supports streaming, async operations, and extensive configuration options for fine-tuned control over text generation.

3

4

## Capabilities

5

6

### Basic Text Completion

7

8

Generate text continuations from a given prompt with configurable generation parameters.

9

10

```python { .api }

11

def create(

12

model: str,

13

prompt: str,

14

max_tokens: Optional[int] = None,

15

temperature: Optional[float] = None,

16

top_p: Optional[float] = None,

17

top_k: Optional[int] = None,

18

repetition_penalty: Optional[float] = None,

19

stream: bool = False,

20

logprobs: Optional[int] = None,

21

echo: Optional[bool] = None,

22

n: Optional[int] = None,

23

presence_penalty: Optional[float] = None,

24

frequency_penalty: Optional[float] = None,

25

logit_bias: Optional[Dict[str, float]] = None,

26

stop: Optional[Union[str, List[str]]] = None,

27

safety_model: Optional[str] = None,

28

**kwargs

29

) -> CompletionResponse:

30

"""

31

Create a text completion from a prompt.

32

33

Args:

34

model: Model identifier for text completion

35

prompt: Input text prompt to complete

36

max_tokens: Maximum tokens to generate

37

temperature: Sampling temperature (0.0 to 2.0)

38

top_p: Nucleus sampling probability threshold

39

top_k: Top-k sampling parameter

40

repetition_penalty: Penalty for repeating tokens

41

stream: Enable streaming response chunks

42

logprobs: Number of log probabilities to return

43

echo: Include prompt in response

44

n: Number of completion choices to generate

45

presence_penalty: Penalty for token presence

46

frequency_penalty: Penalty for token frequency

47

logit_bias: Modify likelihood of specific tokens

48

stop: Stop sequences to end generation

49

safety_model: Safety model to apply

50

51

Returns:

52

CompletionResponse with generated text

53

"""

54

```

55

56

### Streaming Completion

57

58

Real-time streaming of text completion as it is generated.

59

60

```python { .api }

61

def create(

62

model: str,

63

prompt: str,

64

stream: bool = True,

65

**kwargs

66

) -> Iterator[CompletionChunk]:

67

"""

68

Stream text completion chunks in real-time.

69

70

Returns:

71

Iterator yielding CompletionChunk objects

72

"""

73

```

74

75

### Async Text Completion

76

77

Asynchronous text completion operations for concurrent processing.

78

79

```python { .api }

80

async def create(

81

model: str,

82

prompt: str,

83

**kwargs

84

) -> CompletionResponse:

85

"""

86

Asynchronously create text completions.

87

88

Returns:

89

CompletionResponse with generated text

90

"""

91

```

92

93

## Usage Examples

94

95

### Basic Code Completion

96

97

```python

98

from together import Together

99

100

client = Together()

101

102

response = client.completions.create(

103

model="codellama/CodeLlama-34b-Python-hf",

104

prompt="def fibonacci(n):",

105

max_tokens=200,

106

temperature=0.1,

107

stop=["\n\n", "def "]

108

)

109

110

print(response.choices[0].text)

111

```

112

113

### Creative Writing

114

115

```python

116

response = client.completions.create(

117

model="meta-llama/Llama-3.2-3B-Instruct-Turbo",

118

prompt="Once upon a time in a magical forest,",

119

max_tokens=300,

120

temperature=0.8,

121

top_p=0.9

122

)

123

124

print(response.choices[0].text)

125

```

126

127

### Streaming Completion

128

129

```python

130

stream = client.completions.create(

131

model="codellama/CodeLlama-34b-Python-hf",

132

prompt="# Python function to calculate prime numbers\ndef is_prime(n):",

133

stream=True,

134

max_tokens=150,

135

temperature=0.2

136

)

137

138

for chunk in stream:

139

if chunk.choices[0].text:

140

print(chunk.choices[0].text, end="", flush=True)

141

```

142

143

### Multiple Completions

144

145

```python

146

response = client.completions.create(

147

model="meta-llama/Llama-3.2-3B-Instruct-Turbo",

148

prompt="The benefits of renewable energy include:",

149

max_tokens=100,

150

temperature=0.7,

151

n=3 # Generate 3 different completions

152

)

153

154

for i, choice in enumerate(response.choices):

155

print(f"Completion {i+1}: {choice.text}")

156

```

157

158

### Async Batch Processing

159

160

```python

161

import asyncio

162

from together import AsyncTogether

163

164

async def process_completions():

165

client = AsyncTogether()

166

167

prompts = [

168

"Write a Python function to sort a list:",

169

"Explain the concept of recursion:",

170

"Create a simple web scraper in Python:"

171

]

172

173

tasks = [

174

client.completions.create(

175

model="codellama/CodeLlama-34b-Python-hf",

176

prompt=prompt,

177

max_tokens=200,

178

temperature=0.3

179

)

180

for prompt in prompts

181

]

182

183

responses = await asyncio.gather(*tasks)

184

185

for i, response in enumerate(responses):

186

print(f"Response {i+1}: {response.choices[0].text}")

187

188

asyncio.run(process_completions())

189

```

190

191

### Advanced Configuration

192

193

```python

194

response = client.completions.create(

195

model="meta-llama/Llama-3.2-3B-Instruct-Turbo",

196

prompt="Artificial intelligence will",

197

max_tokens=150,

198

temperature=0.7,

199

top_p=0.9,

200

top_k=50,

201

repetition_penalty=1.1,

202

presence_penalty=0.2,

203

frequency_penalty=0.1,

204

stop=[".", "!", "?"],

205

logprobs=5

206

)

207

208

print(f"Generated text: {response.choices[0].text}")

209

print(f"Finish reason: {response.choices[0].finish_reason}")

210

211

if response.choices[0].logprobs:

212

print("Token probabilities:")

213

for token, logprob in zip(

214

response.choices[0].logprobs.tokens,

215

response.choices[0].logprobs.token_logprobs

216

):

217

print(f" '{token}': {logprob}")

218

```

219

220

## Types

221

222

### Request Types

223

224

```python { .api }

225

class CompletionRequest:

226

model: str

227

prompt: str

228

max_tokens: Optional[int] = None

229

temperature: Optional[float] = None

230

top_p: Optional[float] = None

231

top_k: Optional[int] = None

232

repetition_penalty: Optional[float] = None

233

stream: bool = False

234

logprobs: Optional[int] = None

235

echo: Optional[bool] = None

236

n: Optional[int] = None

237

presence_penalty: Optional[float] = None

238

frequency_penalty: Optional[float] = None

239

logit_bias: Optional[Dict[str, float]] = None

240

stop: Optional[Union[str, List[str]]] = None

241

safety_model: Optional[str] = None

242

```

243

244

### Response Types

245

246

```python { .api }

247

class CompletionResponse:

248

id: str

249

object: str

250

created: int

251

model: str

252

choices: List[CompletionChoice]

253

usage: Usage

254

255

class CompletionChoice:

256

index: int

257

text: str

258

finish_reason: Optional[str]

259

logprobs: Optional[Logprobs]

260

261

class Usage:

262

prompt_tokens: int

263

completion_tokens: int

264

total_tokens: int

265

266

class Logprobs:

267

tokens: List[str]

268

token_logprobs: List[Optional[float]]

269

top_logprobs: Optional[List[Dict[str, float]]]

270

text_offset: Optional[List[int]]

271

```

272

273

### Streaming Types

274

275

```python { .api }

276

class CompletionChunk:

277

id: str

278

object: str

279

created: int

280

model: str

281

choices: List[CompletionChoiceDelta]

282

283

class CompletionChoiceDelta:

284

index: int

285

text: str

286

finish_reason: Optional[str]

287

logprobs: Optional[Logprobs]

288

```