0
# Image Processing
1
2
Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. These operations provide the tools needed for image-based machine learning workflows.
3
4
## Capabilities
5
6
### Image Decoding and Encoding
7
8
Operations for reading and writing images in various formats.
9
10
```python { .api }
11
def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True):
12
"""
13
Function for decode_bmp, decode_gif, decode_jpeg, and decode_png.
14
15
Parameters:
16
- contents: 0-D. The encoded image bytes
17
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
18
- dtype: The desired DType of the returned Tensor
19
- name: A name for the operation
20
- expand_animations: Controls the shape of the returned op's output
21
22
Returns:
23
Tensor with type dtype and a 3- or 4-dimensional shape
24
"""
25
26
def decode_jpeg(contents, channels=0, ratio=1, fancy_upsampling=True,
27
try_recover_truncated=False, acceptable_fraction=1,
28
dct_method="", name=None):
29
"""
30
Decode a JPEG-encoded image to a uint8 tensor.
31
32
Parameters:
33
- contents: A Tensor of type string. 0-D. The JPEG-encoded image
34
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
35
- ratio: An optional int. Defaults to 1. Downscaling ratio
36
- fancy_upsampling: An optional bool. Defaults to True. If true use a slower but nicer upsampling
37
- try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input
38
- acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted
39
- dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression
40
- name: A name for the operation
41
42
Returns:
43
A Tensor of type uint8
44
"""
45
46
def decode_png(contents, channels=0, dtype=tf.uint8, name=None):
47
"""
48
Decode a PNG-encoded image to a uint8 or uint16 tensor.
49
50
Parameters:
51
- contents: A Tensor of type string. 0-D. The PNG-encoded image
52
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
53
- dtype: An optional tf.DType from: tf.uint8, tf.uint16. Defaults to tf.uint8
54
- name: A name for the operation
55
56
Returns:
57
A Tensor of type dtype
58
"""
59
60
def encode_jpeg(image, format="", quality=95, progressive=False,
61
optimize_size=False, chroma_downsampling=True,
62
density_unit="in", x_density=300, y_density=300,
63
xmp_metadata="", name=None):
64
"""
65
JPEG-encode an image.
66
67
Parameters:
68
- image: A Tensor of type uint8. 3-D with shape [height, width, channels]
69
- format: An optional string from: "", "grayscale", "rgb". Defaults to ""
70
- quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100
71
- progressive: An optional bool. Defaults to False. If True, create a JPEG that loads progressively
72
- optimize_size: An optional bool. Defaults to False. If True, spend CPU/RAM to reduce size with no quality change
73
- chroma_downsampling: An optional bool. Defaults to True. See http://en.wikipedia.org/wiki/Chroma_subsampling
74
- density_unit: An optional string from: "in", "cm". Defaults to "in". Unit used to specify x_density and y_density
75
- x_density: An optional int. Defaults to 300. Horizontal pixels per density unit
76
- y_density: An optional int. Defaults to 300. Vertical pixels per density unit
77
- xmp_metadata: An optional string. Defaults to "". If not empty, embed this XMP metadata in the image header
78
- name: A name for the operation
79
80
Returns:
81
A Tensor of type string
82
"""
83
84
def encode_png(image, compression=-1, name=None):
85
"""
86
PNG-encode an image.
87
88
Parameters:
89
- image: A Tensor. Must be one of the following types: uint8, uint16. 3-D with shape [height, width, channels]
90
- compression: An optional int. Defaults to -1. Compression level
91
- name: A name for the operation
92
93
Returns:
94
A Tensor of type string
95
"""
96
```
97
98
### Image Resizing and Cropping
99
100
Operations for resizing and cropping images.
101
102
```python { .api }
103
def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False,
104
antialias=False, name=None):
105
"""
106
Resize images to size using the specified method.
107
108
Parameters:
109
- images: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
110
- size: A 1-D int32 Tensor of 2 elements: new_height, new_width
111
- method: An image.ResizeMethod, or string equivalent
112
- preserve_aspect_ratio: Whether to preserve the aspect ratio
113
- antialias: Whether to use an anti-aliasing filter when downsampling an image
114
- name: A name for this operation
115
116
Returns:
117
If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]
118
"""
119
120
def resize_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR, antialias=False):
121
"""
122
Resizes and pads an image to a target width and height.
123
124
Parameters:
125
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
126
- target_height: Target height
127
- target_width: Target width
128
- method: An image.ResizeMethod, or string equivalent
129
- antialias: Whether to use an anti-aliasing filter when downsampling an image
130
131
Returns:
132
Resized and padded image
133
"""
134
135
def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width):
136
"""
137
Crops an image to a specified bounding box.
138
139
Parameters:
140
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
141
- offset_height: Vertical coordinate of the top-left corner of the result in the input
142
- offset_width: Horizontal coordinate of the top-left corner of the result in the input
143
- target_height: Height of the result
144
- target_width: Width of the result
145
146
Returns:
147
Cropped image(s)
148
"""
149
150
def central_crop(image, central_fraction):
151
"""
152
Crop the central region of the image(s).
153
154
Parameters:
155
- image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth]
156
- central_fraction: float (0, 1], fraction of size to crop
157
158
Returns:
159
3-D / 4-D float Tensor, as per the input
160
"""
161
162
def random_crop(value, size, seed=None, name=None):
163
"""
164
Randomly crops a tensor to a given size.
165
166
Parameters:
167
- value: Input tensor to crop
168
- size: 1-D tensor with size the rank of value
169
- seed: A shape [2] Tensor, the seed to the random number generator
170
- name: A name for this operation
171
172
Returns:
173
A cropped tensor of the same rank as value and shape size
174
"""
175
```
176
177
### Image Transformations
178
179
Geometric transformations and spatial manipulations.
180
181
```python { .api }
182
def flip_left_right(image):
183
"""
184
Flip an image horizontally (left to right).
185
186
Parameters:
187
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
188
189
Returns:
190
A tensor of the same type and shape as image
191
"""
192
193
def flip_up_down(image):
194
"""
195
Flip an image vertically (upside down).
196
197
Parameters:
198
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
199
200
Returns:
201
A tensor of the same type and shape as image
202
"""
203
204
def transpose(image, name=None):
205
"""
206
Transpose image(s) by swapping the height and width dimension.
207
208
Parameters:
209
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
210
- name: A name for this operation
211
212
Returns:
213
A tensor of the same type and shape as image, transposed
214
"""
215
216
def rot90(image, k=1, name=None):
217
"""
218
Rotate image(s) counter-clockwise by 90 degrees.
219
220
Parameters:
221
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
222
- k: A scalar integer tensor. The number of times the image is rotated by 90 degrees
223
- name: A name for this operation
224
225
Returns:
226
A rotated tensor of the same type and shape as image
227
"""
228
229
def random_flip_left_right(image, seed=None):
230
"""
231
Randomly flip an image horizontally (left to right).
232
233
Parameters:
234
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
235
- seed: A Python integer. Used to create a random seed
236
237
Returns:
238
A tensor of the same type and shape as image
239
"""
240
241
def random_flip_up_down(image, seed=None):
242
"""
243
Randomly flips an image vertically (upside down).
244
245
Parameters:
246
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
247
- seed: A Python integer. Used to create a random seed
248
249
Returns:
250
A tensor of the same type and shape as image
251
"""
252
```
253
254
### Color Space and Enhancement
255
256
Operations for color manipulation and image enhancement.
257
258
```python { .api }
259
def rgb_to_grayscale(images, name=None):
260
"""
261
Converts one or more images from RGB to Grayscale.
262
263
Parameters:
264
- images: The RGB tensor to convert. The last dimension must have size 3 and should contain RGB values
265
- name: A name for the operation
266
267
Returns:
268
The converted grayscale image(s)
269
"""
270
271
def grayscale_to_rgb(images, name=None):
272
"""
273
Converts one or more images from Grayscale to RGB.
274
275
Parameters:
276
- images: The Grayscale tensor to convert. Last dimension must be size 1
277
- name: A name for the operation
278
279
Returns:
280
The converted RGB image(s)
281
"""
282
283
def rgb_to_hsv(images, name=None):
284
"""
285
Converts one or more images from RGB to HSV.
286
287
Parameters:
288
- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
289
- name: A name for the operation
290
291
Returns:
292
A Tensor. Has the same type as images
293
"""
294
295
def hsv_to_rgb(images, name=None):
296
"""
297
Converts one or more images from HSV to RGB.
298
299
Parameters:
300
- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
301
- name: A name for the operation
302
303
Returns:
304
A Tensor. Has the same type as images
305
"""
306
307
def adjust_brightness(image, delta):
308
"""
309
Adjust the brightness of RGB or Grayscale images.
310
311
Parameters:
312
- image: RGB image or images to adjust
313
- delta: A scalar. Amount to add to the pixel values
314
315
Returns:
316
The brightness-adjusted image(s)
317
"""
318
319
def adjust_contrast(images, contrast_factor):
320
"""
321
Adjust contrast of RGB or grayscale images.
322
323
Parameters:
324
- images: Images to adjust. At least 3-D
325
- contrast_factor: A float multiplier for adjusting contrast
326
327
Returns:
328
The contrast-adjusted image or images
329
"""
330
331
def adjust_hue(image, delta, name=None):
332
"""
333
Adjust hue of RGB images.
334
335
Parameters:
336
- image: RGB image or images. The image hue is adjusted by converting the image(s) to HSV and rotating the hue channel (H)
337
- delta: float. How much to add to the hue channel
338
- name: A name for this operation
339
340
Returns:
341
The hue-adjusted image or images
342
"""
343
344
def adjust_saturation(image, saturation_factor, name=None):
345
"""
346
Adjust saturation of RGB images.
347
348
Parameters:
349
- image: RGB image or images. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S)
350
- saturation_factor: float. Factor to multiply the saturation by
351
- name: A name for this operation
352
353
Returns:
354
The saturation-adjusted image or images
355
"""
356
357
def random_brightness(image, max_delta, seed=None):
358
"""
359
Adjust the brightness of images by a random factor.
360
361
Parameters:
362
- image: An image or images to adjust
363
- max_delta: float, must be non-negative
364
- seed: A Python integer. Used to create a random seed
365
366
Returns:
367
The brightness-adjusted image(s)
368
"""
369
370
def random_contrast(image, lower, upper, seed=None):
371
"""
372
Adjust the contrast of an image or images by a random factor.
373
374
Parameters:
375
- image: An image tensor with 3 or more dimensions
376
- lower: float. Lower bound for the random contrast factor
377
- upper: float. Upper bound for the random contrast factor
378
- seed: A Python integer. Used to create a random seed
379
380
Returns:
381
The contrast-adjusted tensor
382
"""
383
```
384
385
### Image Quality and Metrics
386
387
Operations for measuring image quality and computing metrics.
388
389
```python { .api }
390
def psnr(a, b, max_val, name=None):
391
"""
392
Returns the Peak Signal-to-Noise Ratio between a and b.
393
394
Parameters:
395
- a: First set of images
396
- b: Second set of images
397
- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
398
- name: Namespace to embed the computation in
399
400
Returns:
401
The scalar PSNR between a and b. The returned tensor has type tf.float32 and shape [batch_size, 1]
402
"""
403
404
def ssim(img1, img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
405
"""
406
Computes SSIM index between img1 and img2.
407
408
Parameters:
409
- img1: First image batch
410
- img2: Second image batch
411
- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
412
- filter_size: Default value 11 (size of gaussian filter)
413
- filter_sigma: Default value 1.5 (width of gaussian filter)
414
- k1: Default value 0.01
415
- k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so it should be larger that K1)
416
417
Returns:
418
A tensor containing an SSIM value for each image in batch
419
"""
420
421
def total_variation(images, name=None):
422
"""
423
Calculate and return the total variation for one or more images.
424
425
Parameters:
426
- images: A Tensor. Must be one of the following types: half, float32, float64
427
- name: A name for the operation
428
429
Returns:
430
A Tensor. Has the same type as images
431
"""
432
```
433
434
## Usage Examples
435
436
```python
437
import tensorflow as tf
438
import numpy as np
439
440
# Read and decode images
441
image_string = tf.io.read_file('path/to/image.jpg')
442
image = tf.image.decode_jpeg(image_string, channels=3)
443
444
# Resize image
445
resized_image = tf.image.resize(image, [224, 224])
446
447
# Random augmentations
448
augmented_image = tf.image.random_flip_left_right(image)
449
augmented_image = tf.image.random_brightness(augmented_image, max_delta=0.1)
450
augmented_image = tf.image.random_contrast(augmented_image, lower=0.8, upper=1.2)
451
452
# Crop operations
453
central_cropped = tf.image.central_crop(image, central_fraction=0.8)
454
random_cropped = tf.image.random_crop(image, size=[100, 100, 3])
455
456
# Color space conversions
457
grayscale = tf.image.rgb_to_grayscale(image)
458
hsv_image = tf.image.rgb_to_hsv(image)
459
460
# Image processing pipeline for training
461
def preprocess_image(image_path, label):
462
image = tf.io.read_file(image_path)
463
image = tf.image.decode_jpeg(image, channels=3)
464
image = tf.image.resize(image, [224, 224])
465
image = tf.cast(image, tf.float32) / 255.0
466
467
# Data augmentation
468
image = tf.image.random_flip_left_right(image)
469
image = tf.image.random_brightness(image, max_delta=0.1)
470
image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
471
472
return image, label
473
474
# Batch processing
475
batch_size = 32
476
image_paths = ["path1.jpg", "path2.jpg", ...] # List of image paths
477
labels = [0, 1, ...] # Corresponding labels
478
479
dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
480
dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
481
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
482
483
# Quality metrics
484
img1 = tf.random.uniform([1, 256, 256, 3])
485
img2 = tf.random.uniform([1, 256, 256, 3])
486
487
psnr_value = tf.image.psnr(img1, img2, max_val=1.0)
488
ssim_value = tf.image.ssim(img1, img2, max_val=1.0)
489
490
print(f"PSNR: {psnr_value.numpy()}")
491
print(f"SSIM: {ssim_value.numpy()}")
492
```