or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio-models.mdevaluation-metrics.mdgenerative-models.mdimage-models.mdindex.mdlayers-components.mdmultimodal-models.mdtext-generation-sampling.mdtext-models.mdtokenizers.mdutilities-helpers.md

generative-models.mddocs/

0

# Generative Models

1

2

Advanced generative models for text-to-image synthesis, image-to-image transformation, and image inpainting. Keras Hub provides implementations of state-of-the-art diffusion models including Stable Diffusion 3 and Flux.

3

4

## Capabilities

5

6

### Base Classes

7

8

Foundation classes for different types of generative models.

9

10

```python { .api }

11

class TextToImage(Task):

12

"""Base class for text-to-image generation models."""

13

def __init__(

14

self,

15

backbone: Backbone,

16

preprocessor: Preprocessor = None,

17

**kwargs

18

): ...

19

20

def generate(

21

self,

22

inputs,

23

num_steps: int = 50,

24

guidance_scale: float = 7.5,

25

**kwargs

26

): ...

27

28

class ImageToImage(Task):

29

"""Base class for image-to-image generation models."""

30

def __init__(

31

self,

32

backbone: Backbone,

33

preprocessor: Preprocessor = None,

34

**kwargs

35

): ...

36

37

class Inpaint(Task):

38

"""Base class for image inpainting models."""

39

def __init__(

40

self,

41

backbone: Backbone,

42

preprocessor: Preprocessor = None,

43

**kwargs

44

): ...

45

```

46

47

### Stable Diffusion 3

48

49

Stable Diffusion 3 is an advanced text-to-image diffusion model with improved quality and prompt adherence.

50

51

```python { .api }

52

class StableDiffusion3Backbone(Backbone):

53

"""Stable Diffusion 3 backbone architecture."""

54

def __init__(

55

self,

56

height: int = 1024,

57

width: int = 1024,

58

num_train_timesteps: int = 1000,

59

shift: float = 3.0,

60

**kwargs

61

): ...

62

63

class StableDiffusion3TextToImage(TextToImage):

64

"""Stable Diffusion 3 model for text-to-image generation."""

65

def __init__(

66

self,

67

backbone: StableDiffusion3Backbone,

68

preprocessor: Preprocessor = None,

69

**kwargs

70

): ...

71

72

class StableDiffusion3ImageToImage(ImageToImage):

73

"""Stable Diffusion 3 model for image-to-image generation."""

74

def __init__(

75

self,

76

backbone: StableDiffusion3Backbone,

77

preprocessor: Preprocessor = None,

78

**kwargs

79

): ...

80

81

class StableDiffusion3Inpaint(Inpaint):

82

"""Stable Diffusion 3 model for image inpainting."""

83

def __init__(

84

self,

85

backbone: StableDiffusion3Backbone,

86

preprocessor: Preprocessor = None,

87

**kwargs

88

): ...

89

90

class StableDiffusion3TextToImagePreprocessor:

91

"""Preprocessor for Stable Diffusion 3 text-to-image generation."""

92

def __init__(

93

self,

94

height: int = 1024,

95

width: int = 1024,

96

**kwargs

97

): ...

98

```

99

100

### Flux

101

102

Flux is a high-quality text-to-image diffusion model with excellent prompt following capabilities.

103

104

```python { .api }

105

class FluxBackbone(Backbone):

106

"""Flux diffusion model backbone."""

107

def __init__(

108

self,

109

height: int = 1024,

110

width: int = 1024,

111

max_sequence_length: int = 512,

112

**kwargs

113

): ...

114

115

class FluxTextToImage(TextToImage):

116

"""Flux model for text-to-image generation."""

117

def __init__(

118

self,

119

backbone: FluxBackbone,

120

preprocessor: Preprocessor = None,

121

**kwargs

122

): ...

123

124

class FluxTextToImagePreprocessor:

125

"""Preprocessor for Flux text-to-image generation."""

126

def __init__(

127

self,

128

height: int = 1024,

129

width: int = 1024,

130

max_sequence_length: int = 512,

131

**kwargs

132

): ...

133

```

134

135

### Preprocessor Base Classes

136

137

Base classes for generative model preprocessing.

138

139

```python { .api }

140

class TextToImagePreprocessor(Preprocessor):

141

"""Base preprocessor for text-to-image models."""

142

def __init__(

143

self,

144

height: int = 512,

145

width: int = 512,

146

**kwargs

147

): ...

148

```

149

150

## Usage Examples

151

152

### Text-to-Image Generation with Stable Diffusion 3

153

154

```python

155

import keras_hub

156

157

# Load pretrained Stable Diffusion 3 model

158

model = keras_hub.models.StableDiffusion3TextToImage.from_preset("stable_diffusion_3_medium")

159

160

# Generate image from text

161

prompt = "A serene landscape with mountains and a lake at sunset"

162

generated_image = model.generate(

163

prompt,

164

num_steps=50,

165

guidance_scale=7.5,

166

height=1024,

167

width=1024

168

)

169

170

print(f"Generated image shape: {generated_image.shape}")

171

# Save or display the generated image

172

```

173

174

### Image-to-Image with Stable Diffusion 3

175

176

```python

177

import keras_hub

178

import numpy as np

179

180

# Load image-to-image model

181

model = keras_hub.models.StableDiffusion3ImageToImage.from_preset("stable_diffusion_3_medium")

182

183

# Prepare input image and prompt

184

input_image = np.random.random((1024, 1024, 3)) # Example input image

185

prompt = "Transform this into a painting in the style of Van Gogh"

186

187

# Generate transformed image

188

generated_image = model.generate(

189

[input_image, prompt],

190

num_steps=50,

191

guidance_scale=7.5,

192

strength=0.8 # How much to transform the input image

193

)

194

195

print(f"Transformed image shape: {generated_image.shape}")

196

```

197

198

### Image Inpainting with Stable Diffusion 3

199

200

```python

201

import keras_hub

202

import numpy as np

203

204

# Load inpainting model

205

model = keras_hub.models.StableDiffusion3Inpaint.from_preset("stable_diffusion_3_medium")

206

207

# Prepare input image, mask, and prompt

208

input_image = np.random.random((1024, 1024, 3)) # Example input image

209

mask = np.zeros((1024, 1024, 1)) # Mask where 1 indicates areas to inpaint

210

mask[400:600, 400:600] = 1 # Square region to inpaint

211

prompt = "A beautiful flower in the center"

212

213

# Generate inpainted image

214

inpainted_image = model.generate(

215

[input_image, mask, prompt],

216

num_steps=50,

217

guidance_scale=7.5

218

)

219

220

print(f"Inpainted image shape: {inpainted_image.shape}")

221

```

222

223

### Text-to-Image with Flux

224

225

```python

226

import keras_hub

227

228

# Load Flux model

229

model = keras_hub.models.FluxTextToImage.from_preset("flux_1_dev")

230

231

# Generate high-quality image

232

prompt = "A photorealistic portrait of a wise old wizard with a long white beard, wearing a pointed hat, in a mystical forest setting"

233

234

generated_image = model.generate(

235

prompt,

236

num_steps=50,

237

guidance_scale=3.5, # Flux typically uses lower guidance scales

238

height=1024,

239

width=1024

240

)

241

242

print(f"Generated image shape: {generated_image.shape}")

243

```

244

245

### Batch Generation

246

247

```python

248

import keras_hub

249

250

# Load model

251

model = keras_hub.models.StableDiffusion3TextToImage.from_preset("stable_diffusion_3_medium")

252

253

# Generate multiple images from different prompts

254

prompts = [

255

"A cat sitting on a windowsill",

256

"A futuristic cityscape at night",

257

"A peaceful garden with flowers"

258

]

259

260

# Generate batch of images

261

generated_images = model.generate(

262

prompts,

263

num_steps=50,

264

guidance_scale=7.5,

265

height=512,

266

width=512

267

)

268

269

print(f"Generated batch shape: {generated_images.shape}")

270

# Shape will be (3, 512, 512, 3) for 3 images

271

```

272

273

### Custom Generation Pipeline

274

275

```python

276

import keras_hub

277

278

# Create custom backbone

279

backbone = keras_hub.models.StableDiffusion3Backbone(

280

height=768,

281

width=768,

282

num_train_timesteps=1000

283

)

284

285

# Create preprocessor

286

preprocessor = keras_hub.models.StableDiffusion3TextToImagePreprocessor(

287

height=768,

288

width=768

289

)

290

291

# Create custom model

292

model = keras_hub.models.StableDiffusion3TextToImage(

293

backbone=backbone,

294

preprocessor=preprocessor

295

)

296

297

# Use for generation

298

generated_image = model.generate(

299

"A custom prompt",

300

num_steps=30,

301

guidance_scale=8.0

302

)

303

```

304

305

### Controlling Generation Parameters

306

307

```python

308

import keras_hub

309

310

# Load model

311

model = keras_hub.models.StableDiffusion3TextToImage.from_preset("stable_diffusion_3_medium")

312

313

# Fine-tune generation parameters

314

generated_image = model.generate(

315

"A detailed digital artwork of a dragon",

316

num_steps=75, # More steps for higher quality

317

guidance_scale=10.0, # Higher guidance for stronger prompt adherence

318

height=1024,

319

width=1024,

320

seed=42 # Set seed for reproducible results

321

)

322

323

print("Generated image with custom parameters")

324

```