An end-to-end open source platform for machine learning
—
Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. These operations provide the tools needed for image-based machine learning workflows.
Operations for reading and writing images in various formats.
def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True):
"""
Function for decode_bmp, decode_gif, decode_jpeg, and decode_png.
Parameters:
- contents: 0-D. The encoded image bytes
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
- dtype: The desired DType of the returned Tensor
- name: A name for the operation
- expand_animations: Controls the shape of the returned op's output
Returns:
Tensor with type dtype and a 3- or 4-dimensional shape
"""
def decode_jpeg(contents, channels=0, ratio=1, fancy_upsampling=True,
try_recover_truncated=False, acceptable_fraction=1,
dct_method="", name=None):
"""
Decode a JPEG-encoded image to a uint8 tensor.
Parameters:
- contents: A Tensor of type string. 0-D. The JPEG-encoded image
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
- ratio: An optional int. Defaults to 1. Downscaling ratio
- fancy_upsampling: An optional bool. Defaults to True. If true use a slower but nicer upsampling
- try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input
- acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted
- dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression
- name: A name for the operation
Returns:
A Tensor of type uint8
"""
def decode_png(contents, channels=0, dtype=tf.uint8, name=None):
"""
Decode a PNG-encoded image to a uint8 or uint16 tensor.
Parameters:
- contents: A Tensor of type string. 0-D. The PNG-encoded image
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
- dtype: An optional tf.DType from: tf.uint8, tf.uint16. Defaults to tf.uint8
- name: A name for the operation
Returns:
A Tensor of type dtype
"""
def encode_jpeg(image, format="", quality=95, progressive=False,
optimize_size=False, chroma_downsampling=True,
density_unit="in", x_density=300, y_density=300,
xmp_metadata="", name=None):
"""
JPEG-encode an image.
Parameters:
- image: A Tensor of type uint8. 3-D with shape [height, width, channels]
- format: An optional string from: "", "grayscale", "rgb". Defaults to ""
- quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100
- progressive: An optional bool. Defaults to False. If True, create a JPEG that loads progressively
- optimize_size: An optional bool. Defaults to False. If True, spend CPU/RAM to reduce size with no quality change
- chroma_downsampling: An optional bool. Defaults to True. See http://en.wikipedia.org/wiki/Chroma_subsampling
- density_unit: An optional string from: "in", "cm". Defaults to "in". Unit used to specify x_density and y_density
- x_density: An optional int. Defaults to 300. Horizontal pixels per density unit
- y_density: An optional int. Defaults to 300. Vertical pixels per density unit
- xmp_metadata: An optional string. Defaults to "". If not empty, embed this XMP metadata in the image header
- name: A name for the operation
Returns:
A Tensor of type string
"""
def encode_png(image, compression=-1, name=None):
"""
PNG-encode an image.
Parameters:
- image: A Tensor. Must be one of the following types: uint8, uint16. 3-D with shape [height, width, channels]
- compression: An optional int. Defaults to -1. Compression level
- name: A name for the operation
Returns:
A Tensor of type string
"""Operations for resizing and cropping images.
def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False,
antialias=False, name=None):
"""
Resize images to size using the specified method.
Parameters:
- images: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- size: A 1-D int32 Tensor of 2 elements: new_height, new_width
- method: An image.ResizeMethod, or string equivalent
- preserve_aspect_ratio: Whether to preserve the aspect ratio
- antialias: Whether to use an anti-aliasing filter when downsampling an image
- name: A name for this operation
Returns:
If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]
"""
def resize_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR, antialias=False):
"""
Resizes and pads an image to a target width and height.
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- target_height: Target height
- target_width: Target width
- method: An image.ResizeMethod, or string equivalent
- antialias: Whether to use an anti-aliasing filter when downsampling an image
Returns:
Resized and padded image
"""
def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width):
"""
Crops an image to a specified bounding box.
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- offset_height: Vertical coordinate of the top-left corner of the result in the input
- offset_width: Horizontal coordinate of the top-left corner of the result in the input
- target_height: Height of the result
- target_width: Width of the result
Returns:
Cropped image(s)
"""
def central_crop(image, central_fraction):
"""
Crop the central region of the image(s).
Parameters:
- image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth]
- central_fraction: float (0, 1], fraction of size to crop
Returns:
3-D / 4-D float Tensor, as per the input
"""
def random_crop(value, size, seed=None, name=None):
"""
Randomly crops a tensor to a given size.
Parameters:
- value: Input tensor to crop
- size: 1-D tensor with size the rank of value
- seed: A shape [2] Tensor, the seed to the random number generator
- name: A name for this operation
Returns:
A cropped tensor of the same rank as value and shape size
"""Geometric transformations and spatial manipulations.
def flip_left_right(image):
"""
Flip an image horizontally (left to right).
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
Returns:
A tensor of the same type and shape as image
"""
def flip_up_down(image):
"""
Flip an image vertically (upside down).
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
Returns:
A tensor of the same type and shape as image
"""
def transpose(image, name=None):
"""
Transpose image(s) by swapping the height and width dimension.
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- name: A name for this operation
Returns:
A tensor of the same type and shape as image, transposed
"""
def rot90(image, k=1, name=None):
"""
Rotate image(s) counter-clockwise by 90 degrees.
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- k: A scalar integer tensor. The number of times the image is rotated by 90 degrees
- name: A name for this operation
Returns:
A rotated tensor of the same type and shape as image
"""
def random_flip_left_right(image, seed=None):
"""
Randomly flip an image horizontally (left to right).
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- seed: A Python integer. Used to create a random seed
Returns:
A tensor of the same type and shape as image
"""
def random_flip_up_down(image, seed=None):
"""
Randomly flips an image vertically (upside down).
Parameters:
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
- seed: A Python integer. Used to create a random seed
Returns:
A tensor of the same type and shape as image
"""Operations for color manipulation and image enhancement.
def rgb_to_grayscale(images, name=None):
"""
Converts one or more images from RGB to Grayscale.
Parameters:
- images: The RGB tensor to convert. The last dimension must have size 3 and should contain RGB values
- name: A name for the operation
Returns:
The converted grayscale image(s)
"""
def grayscale_to_rgb(images, name=None):
"""
Converts one or more images from Grayscale to RGB.
Parameters:
- images: The Grayscale tensor to convert. Last dimension must be size 1
- name: A name for the operation
Returns:
The converted RGB image(s)
"""
def rgb_to_hsv(images, name=None):
"""
Converts one or more images from RGB to HSV.
Parameters:
- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
- name: A name for the operation
Returns:
A Tensor. Has the same type as images
"""
def hsv_to_rgb(images, name=None):
"""
Converts one or more images from HSV to RGB.
Parameters:
- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
- name: A name for the operation
Returns:
A Tensor. Has the same type as images
"""
def adjust_brightness(image, delta):
"""
Adjust the brightness of RGB or Grayscale images.
Parameters:
- image: RGB image or images to adjust
- delta: A scalar. Amount to add to the pixel values
Returns:
The brightness-adjusted image(s)
"""
def adjust_contrast(images, contrast_factor):
"""
Adjust contrast of RGB or grayscale images.
Parameters:
- images: Images to adjust. At least 3-D
- contrast_factor: A float multiplier for adjusting contrast
Returns:
The contrast-adjusted image or images
"""
def adjust_hue(image, delta, name=None):
"""
Adjust hue of RGB images.
Parameters:
- image: RGB image or images. The image hue is adjusted by converting the image(s) to HSV and rotating the hue channel (H)
- delta: float. How much to add to the hue channel
- name: A name for this operation
Returns:
The hue-adjusted image or images
"""
def adjust_saturation(image, saturation_factor, name=None):
"""
Adjust saturation of RGB images.
Parameters:
- image: RGB image or images. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S)
- saturation_factor: float. Factor to multiply the saturation by
- name: A name for this operation
Returns:
The saturation-adjusted image or images
"""
def random_brightness(image, max_delta, seed=None):
"""
Adjust the brightness of images by a random factor.
Parameters:
- image: An image or images to adjust
- max_delta: float, must be non-negative
- seed: A Python integer. Used to create a random seed
Returns:
The brightness-adjusted image(s)
"""
def random_contrast(image, lower, upper, seed=None):
"""
Adjust the contrast of an image or images by a random factor.
Parameters:
- image: An image tensor with 3 or more dimensions
- lower: float. Lower bound for the random contrast factor
- upper: float. Upper bound for the random contrast factor
- seed: A Python integer. Used to create a random seed
Returns:
The contrast-adjusted tensor
"""Operations for measuring image quality and computing metrics.
def psnr(a, b, max_val, name=None):
"""
Returns the Peak Signal-to-Noise Ratio between a and b.
Parameters:
- a: First set of images
- b: Second set of images
- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
- name: Namespace to embed the computation in
Returns:
The scalar PSNR between a and b. The returned tensor has type tf.float32 and shape [batch_size, 1]
"""
def ssim(img1, img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
"""
Computes SSIM index between img1 and img2.
Parameters:
- img1: First image batch
- img2: Second image batch
- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
- filter_size: Default value 11 (size of gaussian filter)
- filter_sigma: Default value 1.5 (width of gaussian filter)
- k1: Default value 0.01
- k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so it should be larger that K1)
Returns:
A tensor containing an SSIM value for each image in batch
"""
def total_variation(images, name=None):
"""
Calculate and return the total variation for one or more images.
Parameters:
- images: A Tensor. Must be one of the following types: half, float32, float64
- name: A name for the operation
Returns:
A Tensor. Has the same type as images
"""import tensorflow as tf
import numpy as np
# Read and decode images
image_string = tf.io.read_file('path/to/image.jpg')
image = tf.image.decode_jpeg(image_string, channels=3)
# Resize image
resized_image = tf.image.resize(image, [224, 224])
# Random augmentations
augmented_image = tf.image.random_flip_left_right(image)
augmented_image = tf.image.random_brightness(augmented_image, max_delta=0.1)
augmented_image = tf.image.random_contrast(augmented_image, lower=0.8, upper=1.2)
# Crop operations
central_cropped = tf.image.central_crop(image, central_fraction=0.8)
random_cropped = tf.image.random_crop(image, size=[100, 100, 3])
# Color space conversions
grayscale = tf.image.rgb_to_grayscale(image)
hsv_image = tf.image.rgb_to_hsv(image)
# Image processing pipeline for training
def preprocess_image(image_path, label):
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [224, 224])
image = tf.cast(image, tf.float32) / 255.0
# Data augmentation
image = tf.image.random_flip_left_right(image)
image = tf.image.random_brightness(image, max_delta=0.1)
image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
return image, label
# Batch processing
batch_size = 32
image_paths = ["path1.jpg", "path2.jpg", ...] # List of image paths
labels = [0, 1, ...] # Corresponding labels
dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
# Quality metrics
img1 = tf.random.uniform([1, 256, 256, 3])
img2 = tf.random.uniform([1, 256, 256, 3])
psnr_value = tf.image.psnr(img1, img2, max_val=1.0)
ssim_value = tf.image.ssim(img1, img2, max_val=1.0)
print(f"PSNR: {psnr_value.numpy()}")
print(f"SSIM: {ssim_value.numpy()}")Install with Tessl CLI
npx tessl i tessl/pypi-tensorflow