or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core.mddata.mddistribute.mdimage.mdindex.mdkeras.mdmath.mdnn.mdsaved-model.md

image.mddocs/

0

# Image Processing

1

2

Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. These operations provide the tools needed for image-based machine learning workflows.

3

4

## Capabilities

5

6

### Image Decoding and Encoding

7

8

Operations for reading and writing images in various formats.

9

10

```python { .api }

11

def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True):

12

"""

13

Function for decode_bmp, decode_gif, decode_jpeg, and decode_png.

14

15

Parameters:

16

- contents: 0-D. The encoded image bytes

17

- channels: An optional int. Defaults to 0. Number of color channels for the decoded image

18

- dtype: The desired DType of the returned Tensor

19

- name: A name for the operation

20

- expand_animations: Controls the shape of the returned op's output

21

22

Returns:

23

Tensor with type dtype and a 3- or 4-dimensional shape

24

"""

25

26

def decode_jpeg(contents, channels=0, ratio=1, fancy_upsampling=True,

27

try_recover_truncated=False, acceptable_fraction=1,

28

dct_method="", name=None):

29

"""

30

Decode a JPEG-encoded image to a uint8 tensor.

31

32

Parameters:

33

- contents: A Tensor of type string. 0-D. The JPEG-encoded image

34

- channels: An optional int. Defaults to 0. Number of color channels for the decoded image

35

- ratio: An optional int. Defaults to 1. Downscaling ratio

36

- fancy_upsampling: An optional bool. Defaults to True. If true use a slower but nicer upsampling

37

- try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input

38

- acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted

39

- dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression

40

- name: A name for the operation

41

42

Returns:

43

A Tensor of type uint8

44

"""

45

46

def decode_png(contents, channels=0, dtype=tf.uint8, name=None):

47

"""

48

Decode a PNG-encoded image to a uint8 or uint16 tensor.

49

50

Parameters:

51

- contents: A Tensor of type string. 0-D. The PNG-encoded image

52

- channels: An optional int. Defaults to 0. Number of color channels for the decoded image

53

- dtype: An optional tf.DType from: tf.uint8, tf.uint16. Defaults to tf.uint8

54

- name: A name for the operation

55

56

Returns:

57

A Tensor of type dtype

58

"""

59

60

def encode_jpeg(image, format="", quality=95, progressive=False,

61

optimize_size=False, chroma_downsampling=True,

62

density_unit="in", x_density=300, y_density=300,

63

xmp_metadata="", name=None):

64

"""

65

JPEG-encode an image.

66

67

Parameters:

68

- image: A Tensor of type uint8. 3-D with shape [height, width, channels]

69

- format: An optional string from: "", "grayscale", "rgb". Defaults to ""

70

- quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100

71

- progressive: An optional bool. Defaults to False. If True, create a JPEG that loads progressively

72

- optimize_size: An optional bool. Defaults to False. If True, spend CPU/RAM to reduce size with no quality change

73

- chroma_downsampling: An optional bool. Defaults to True. See http://en.wikipedia.org/wiki/Chroma_subsampling

74

- density_unit: An optional string from: "in", "cm". Defaults to "in". Unit used to specify x_density and y_density

75

- x_density: An optional int. Defaults to 300. Horizontal pixels per density unit

76

- y_density: An optional int. Defaults to 300. Vertical pixels per density unit

77

- xmp_metadata: An optional string. Defaults to "". If not empty, embed this XMP metadata in the image header

78

- name: A name for the operation

79

80

Returns:

81

A Tensor of type string

82

"""

83

84

def encode_png(image, compression=-1, name=None):

85

"""

86

PNG-encode an image.

87

88

Parameters:

89

- image: A Tensor. Must be one of the following types: uint8, uint16. 3-D with shape [height, width, channels]

90

- compression: An optional int. Defaults to -1. Compression level

91

- name: A name for the operation

92

93

Returns:

94

A Tensor of type string

95

"""

96

```

97

98

### Image Resizing and Cropping

99

100

Operations for resizing and cropping images.

101

102

```python { .api }

103

def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False,

104

antialias=False, name=None):

105

"""

106

Resize images to size using the specified method.

107

108

Parameters:

109

- images: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

110

- size: A 1-D int32 Tensor of 2 elements: new_height, new_width

111

- method: An image.ResizeMethod, or string equivalent

112

- preserve_aspect_ratio: Whether to preserve the aspect ratio

113

- antialias: Whether to use an anti-aliasing filter when downsampling an image

114

- name: A name for this operation

115

116

Returns:

117

If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]

118

"""

119

120

def resize_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR, antialias=False):

121

"""

122

Resizes and pads an image to a target width and height.

123

124

Parameters:

125

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

126

- target_height: Target height

127

- target_width: Target width

128

- method: An image.ResizeMethod, or string equivalent

129

- antialias: Whether to use an anti-aliasing filter when downsampling an image

130

131

Returns:

132

Resized and padded image

133

"""

134

135

def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width):

136

"""

137

Crops an image to a specified bounding box.

138

139

Parameters:

140

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

141

- offset_height: Vertical coordinate of the top-left corner of the result in the input

142

- offset_width: Horizontal coordinate of the top-left corner of the result in the input

143

- target_height: Height of the result

144

- target_width: Width of the result

145

146

Returns:

147

Cropped image(s)

148

"""

149

150

def central_crop(image, central_fraction):

151

"""

152

Crop the central region of the image(s).

153

154

Parameters:

155

- image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth]

156

- central_fraction: float (0, 1], fraction of size to crop

157

158

Returns:

159

3-D / 4-D float Tensor, as per the input

160

"""

161

162

def random_crop(value, size, seed=None, name=None):

163

"""

164

Randomly crops a tensor to a given size.

165

166

Parameters:

167

- value: Input tensor to crop

168

- size: 1-D tensor with size the rank of value

169

- seed: A shape [2] Tensor, the seed to the random number generator

170

- name: A name for this operation

171

172

Returns:

173

A cropped tensor of the same rank as value and shape size

174

"""

175

```

176

177

### Image Transformations

178

179

Geometric transformations and spatial manipulations.

180

181

```python { .api }

182

def flip_left_right(image):

183

"""

184

Flip an image horizontally (left to right).

185

186

Parameters:

187

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

188

189

Returns:

190

A tensor of the same type and shape as image

191

"""

192

193

def flip_up_down(image):

194

"""

195

Flip an image vertically (upside down).

196

197

Parameters:

198

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

199

200

Returns:

201

A tensor of the same type and shape as image

202

"""

203

204

def transpose(image, name=None):

205

"""

206

Transpose image(s) by swapping the height and width dimension.

207

208

Parameters:

209

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

210

- name: A name for this operation

211

212

Returns:

213

A tensor of the same type and shape as image, transposed

214

"""

215

216

def rot90(image, k=1, name=None):

217

"""

218

Rotate image(s) counter-clockwise by 90 degrees.

219

220

Parameters:

221

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

222

- k: A scalar integer tensor. The number of times the image is rotated by 90 degrees

223

- name: A name for this operation

224

225

Returns:

226

A rotated tensor of the same type and shape as image

227

"""

228

229

def random_flip_left_right(image, seed=None):

230

"""

231

Randomly flip an image horizontally (left to right).

232

233

Parameters:

234

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

235

- seed: A Python integer. Used to create a random seed

236

237

Returns:

238

A tensor of the same type and shape as image

239

"""

240

241

def random_flip_up_down(image, seed=None):

242

"""

243

Randomly flips an image vertically (upside down).

244

245

Parameters:

246

- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

247

- seed: A Python integer. Used to create a random seed

248

249

Returns:

250

A tensor of the same type and shape as image

251

"""

252

```

253

254

### Color Space and Enhancement

255

256

Operations for color manipulation and image enhancement.

257

258

```python { .api }

259

def rgb_to_grayscale(images, name=None):

260

"""

261

Converts one or more images from RGB to Grayscale.

262

263

Parameters:

264

- images: The RGB tensor to convert. The last dimension must have size 3 and should contain RGB values

265

- name: A name for the operation

266

267

Returns:

268

The converted grayscale image(s)

269

"""

270

271

def grayscale_to_rgb(images, name=None):

272

"""

273

Converts one or more images from Grayscale to RGB.

274

275

Parameters:

276

- images: The Grayscale tensor to convert. Last dimension must be size 1

277

- name: A name for the operation

278

279

Returns:

280

The converted RGB image(s)

281

"""

282

283

def rgb_to_hsv(images, name=None):

284

"""

285

Converts one or more images from RGB to HSV.

286

287

Parameters:

288

- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64

289

- name: A name for the operation

290

291

Returns:

292

A Tensor. Has the same type as images

293

"""

294

295

def hsv_to_rgb(images, name=None):

296

"""

297

Converts one or more images from HSV to RGB.

298

299

Parameters:

300

- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64

301

- name: A name for the operation

302

303

Returns:

304

A Tensor. Has the same type as images

305

"""

306

307

def adjust_brightness(image, delta):

308

"""

309

Adjust the brightness of RGB or Grayscale images.

310

311

Parameters:

312

- image: RGB image or images to adjust

313

- delta: A scalar. Amount to add to the pixel values

314

315

Returns:

316

The brightness-adjusted image(s)

317

"""

318

319

def adjust_contrast(images, contrast_factor):

320

"""

321

Adjust contrast of RGB or grayscale images.

322

323

Parameters:

324

- images: Images to adjust. At least 3-D

325

- contrast_factor: A float multiplier for adjusting contrast

326

327

Returns:

328

The contrast-adjusted image or images

329

"""

330

331

def adjust_hue(image, delta, name=None):

332

"""

333

Adjust hue of RGB images.

334

335

Parameters:

336

- image: RGB image or images. The image hue is adjusted by converting the image(s) to HSV and rotating the hue channel (H)

337

- delta: float. How much to add to the hue channel

338

- name: A name for this operation

339

340

Returns:

341

The hue-adjusted image or images

342

"""

343

344

def adjust_saturation(image, saturation_factor, name=None):

345

"""

346

Adjust saturation of RGB images.

347

348

Parameters:

349

- image: RGB image or images. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S)

350

- saturation_factor: float. Factor to multiply the saturation by

351

- name: A name for this operation

352

353

Returns:

354

The saturation-adjusted image or images

355

"""

356

357

def random_brightness(image, max_delta, seed=None):

358

"""

359

Adjust the brightness of images by a random factor.

360

361

Parameters:

362

- image: An image or images to adjust

363

- max_delta: float, must be non-negative

364

- seed: A Python integer. Used to create a random seed

365

366

Returns:

367

The brightness-adjusted image(s)

368

"""

369

370

def random_contrast(image, lower, upper, seed=None):

371

"""

372

Adjust the contrast of an image or images by a random factor.

373

374

Parameters:

375

- image: An image tensor with 3 or more dimensions

376

- lower: float. Lower bound for the random contrast factor

377

- upper: float. Upper bound for the random contrast factor

378

- seed: A Python integer. Used to create a random seed

379

380

Returns:

381

The contrast-adjusted tensor

382

"""

383

```

384

385

### Image Quality and Metrics

386

387

Operations for measuring image quality and computing metrics.

388

389

```python { .api }

390

def psnr(a, b, max_val, name=None):

391

"""

392

Returns the Peak Signal-to-Noise Ratio between a and b.

393

394

Parameters:

395

- a: First set of images

396

- b: Second set of images

397

- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)

398

- name: Namespace to embed the computation in

399

400

Returns:

401

The scalar PSNR between a and b. The returned tensor has type tf.float32 and shape [batch_size, 1]

402

"""

403

404

def ssim(img1, img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):

405

"""

406

Computes SSIM index between img1 and img2.

407

408

Parameters:

409

- img1: First image batch

410

- img2: Second image batch

411

- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)

412

- filter_size: Default value 11 (size of gaussian filter)

413

- filter_sigma: Default value 1.5 (width of gaussian filter)

414

- k1: Default value 0.01

415

- k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so it should be larger that K1)

416

417

Returns:

418

A tensor containing an SSIM value for each image in batch

419

"""

420

421

def total_variation(images, name=None):

422

"""

423

Calculate and return the total variation for one or more images.

424

425

Parameters:

426

- images: A Tensor. Must be one of the following types: half, float32, float64

427

- name: A name for the operation

428

429

Returns:

430

A Tensor. Has the same type as images

431

"""

432

```

433

434

## Usage Examples

435

436

```python

437

import tensorflow as tf

438

import numpy as np

439

440

# Read and decode images

441

image_string = tf.io.read_file('path/to/image.jpg')

442

image = tf.image.decode_jpeg(image_string, channels=3)

443

444

# Resize image

445

resized_image = tf.image.resize(image, [224, 224])

446

447

# Random augmentations

448

augmented_image = tf.image.random_flip_left_right(image)

449

augmented_image = tf.image.random_brightness(augmented_image, max_delta=0.1)

450

augmented_image = tf.image.random_contrast(augmented_image, lower=0.8, upper=1.2)

451

452

# Crop operations

453

central_cropped = tf.image.central_crop(image, central_fraction=0.8)

454

random_cropped = tf.image.random_crop(image, size=[100, 100, 3])

455

456

# Color space conversions

457

grayscale = tf.image.rgb_to_grayscale(image)

458

hsv_image = tf.image.rgb_to_hsv(image)

459

460

# Image processing pipeline for training

461

def preprocess_image(image_path, label):

462

image = tf.io.read_file(image_path)

463

image = tf.image.decode_jpeg(image, channels=3)

464

image = tf.image.resize(image, [224, 224])

465

image = tf.cast(image, tf.float32) / 255.0

466

467

# Data augmentation

468

image = tf.image.random_flip_left_right(image)

469

image = tf.image.random_brightness(image, max_delta=0.1)

470

image = tf.image.random_contrast(image, lower=0.9, upper=1.1)

471

472

return image, label

473

474

# Batch processing

475

batch_size = 32

476

image_paths = ["path1.jpg", "path2.jpg", ...] # List of image paths

477

labels = [0, 1, ...] # Corresponding labels

478

479

dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))

480

dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

481

dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

482

483

# Quality metrics

484

img1 = tf.random.uniform([1, 256, 256, 3])

485

img2 = tf.random.uniform([1, 256, 256, 3])

486

487

psnr_value = tf.image.psnr(img1, img2, max_val=1.0)

488

ssim_value = tf.image.ssim(img1, img2, max_val=1.0)

489

490

print(f"PSNR: {psnr_value.numpy()}")

491

print(f"SSIM: {ssim_value.numpy()}")

492

```