CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-imutils

A series of convenience functions to make basic image processing functions such as translation, rotation, resizing, skeletonization, displaying Matplotlib images, sorting contours, detecting edges, and much more easier with OpenCV and both Python 2.7 and Python 3.

91

1.33x
Overview
Eval results
Files

utilities.mddocs/

Utility Functions

Specialized utilities for contour processing, perspective transforms, path handling, text rendering, image encoding, and temporary file management. These functions provide common operations needed in computer vision workflows.

Capabilities

Contour Processing

Utilities for sorting and labeling contours in computer vision applications.

def sort_contours(cnts, method="left-to-right"):
    """
    Sort contours by position.
    
    Args:
        cnts (list): List of contours
        method (str): Sorting method (default: "left-to-right")
                     Options: "left-to-right", "right-to-left", 
                             "top-to-bottom", "bottom-to-top"
    
    Returns:
        tuple: (sorted_contours, sorted_bounding_boxes)
    """

def label_contour(image, c, i, color=(0, 255, 0), thickness=2):
    """
    Label contour with number.
    
    Args:
        image (np.ndarray): Input image
        c (np.ndarray): Contour to label
        i (int): Label number (will display as i+1)
        color (tuple): BGR color for drawing (default: (0, 255, 0))
        thickness (int): Line thickness (default: 2)
    
    Returns:
        np.ndarray: Image with labeled contour
    """

Usage Example:

import cv2
import imutils
from imutils import contours

# Load and preprocess image
image = cv2.imread("objects.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]

# Find contours
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)

# Sort contours from left to right
(cnts, boundingBoxes) = contours.sort_contours(cnts, method="left-to-right")

# Label each contour
output = image.copy()
for (i, c) in enumerate(cnts):
    output = contours.label_contour(output, c, i)

cv2.imshow("Sorted and Labeled Contours", output)
cv2.waitKey(0)
cv2.destroyAllWindows()

Perspective Transformation

Functions for perspective correction and bird's-eye view transformations.

def order_points(pts):
    """
    Order quadrilateral points in consistent order.
    
    Args:
        pts (np.ndarray): 4 points defining a quadrilateral
    
    Returns:
        np.ndarray: Points ordered as [top-left, top-right, bottom-right, bottom-left]
    """

def four_point_transform(image, pts):
    """
    Apply perspective transform for bird's eye view.
    
    Args:
        image (np.ndarray): Input image
        pts (np.ndarray): 4 corner points of the region to transform
    
    Returns:
        np.ndarray: Transformed image with rectangular perspective
        
    Note:
        Automatically calculates the destination rectangle dimensions.
        Points are ordered using order_points() for consistency.
    """

Usage Example:

import cv2
import numpy as np
from imutils import perspective

# Load image
image = cv2.imread("document.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Find edges and contours (document detection)
edged = cv2.Canny(gray, 75, 200)
cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

# Find the document contour
for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    
    if len(approx) == 4:
        screenCnt = approx
        break

# Apply perspective transform
pts = screenCnt.reshape(4, 2)
warped = perspective.four_point_transform(image, pts)

cv2.imshow("Original", image)
cv2.imshow("Scanned", warped)
cv2.waitKey(0)
cv2.destroyAllWindows()

Object Detection Utilities

Non-maximum suppression for object detection post-processing.

def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
    """
    Apply non-maximum suppression to bounding boxes.
    
    Args:
        boxes (np.ndarray): Array of bounding boxes (x1, y1, x2, y2) format
        probs (np.ndarray, optional): Confidence scores for each box
        overlapThresh (float): Overlap threshold for suppression (default: 0.3)
    
    Returns:
        np.ndarray: Array of selected bounding boxes after NMS
        
    Note:
        If probs is None, boxes are sorted by bottom-right y-coordinate.
        Otherwise, boxes are sorted by confidence scores.
    """

Usage Example:

import cv2
import numpy as np
from imutils import object_detection

# Example bounding boxes and confidence scores
boxes = np.array([
    [100, 100, 200, 200],
    [120, 120, 220, 220],
    [300, 300, 400, 400],
    [310, 310, 410, 410]
])

confidence_scores = np.array([0.9, 0.8, 0.95, 0.85])

# Apply non-maximum suppression
selected_boxes = object_detection.non_max_suppression(
    boxes, probs=confidence_scores, overlapThresh=0.3
)

print(f"Original boxes: {len(boxes)}")
print(f"After NMS: {len(selected_boxes)}")

# Draw results on image
image = np.zeros((500, 500, 3), dtype=np.uint8)

# Draw all original boxes in red
for (x1, y1, x2, y2) in boxes:
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 1)

# Draw selected boxes in green (thicker)
for (x1, y1, x2, y2) in selected_boxes:
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.imshow("Non-Maximum Suppression", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Path and File Utilities

Functions for listing image files and working with file paths.

def list_images(basePath, contains=None):
    """
    List image files in directory.
    
    Args:
        basePath (str): Base directory path to search
        contains (str, optional): String that filename must contain
    
    Returns:
        generator: Generator yielding image file paths
        
    Note:
        Searches recursively through directory structure.
        Supported extensions: .jpg, .jpeg, .png, .bmp, .tif, .tiff
    """

def list_files(basePath, validExts=None, contains=None):
    """
    List files with optional filtering.
    
    Args:
        basePath (str): Base directory path to search
        validExts (tuple, optional): Valid file extensions to include
        contains (str, optional): String that filename must contain
    
    Returns:
        generator: Generator yielding file paths
    """

# Constants
image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")

Usage Example:

from imutils import paths
import cv2

# List all images in directory
image_paths = list(paths.list_images("dataset/"))
print(f"Found {len(image_paths)} images")

# List images containing "face" in filename
face_images = list(paths.list_images("dataset/", contains="face"))
print(f"Found {len(face_images)} face images")

# List all Python files
python_files = list(paths.list_files("project/", validExts=(".py",)))
print(f"Found {len(python_files)} Python files")

# Process all images in directory
for image_path in paths.list_images("input_images/"):
    print(f"Processing {image_path}")
    image = cv2.imread(image_path)
    
    # Process image here
    processed = cv2.GaussianBlur(image, (15, 15), 0)
    
    # Save processed image
    output_path = image_path.replace("input_images", "output_images")
    cv2.imwrite(output_path, processed)

Text Rendering

Utilities for drawing text with line breaks and centering.

def put_text(img, text, org, font_face, font_scale, color, thickness=1, 
             line_type=8, bottom_left_origin=False):
    """
    Draw multi-line text with line breaks.
    
    Args:
        img (np.ndarray): Image to draw on (modified in place)
        text (str): Text string (use \\n for line breaks)
        org (tuple): (x, y) position of first line bottom-left corner
        font_face (int): OpenCV font type
        font_scale (float): Font scale factor
        color (tuple): Text color (B, G, R)
        thickness (int): Text thickness (default: 1)
        line_type (int): Line type (default: 8)
        bottom_left_origin (bool): Coordinate system origin (default: False)
    
    Returns:
        None: Image is modified in place
    """

def put_centered_text(img, text, font_face, font_scale, color, thickness=1, line_type=8):
    """
    Draw vertically and horizontally centered multi-line text.
    
    Args:
        img (np.ndarray): Image to draw on (modified in place)
        text (str): Text string (use \\n for line breaks)
        font_face (int): OpenCV font type
        font_scale (float): Font scale factor
        color (tuple): Text color (B, G, R)
        thickness (int): Text thickness (default: 1)
        line_type (int): Line type (default: 8)
    
    Returns:
        None: Image is modified in place
    """

Usage Example:

import cv2
import numpy as np
from imutils import text

# Create blank image
img = np.zeros((400, 600, 3), dtype=np.uint8)

# Multi-line text with line breaks
multiline_text = "This is line 1\\nThis is line 2\\nThis is line 3"

# Draw text at specific position
text.put_text(img, multiline_text, (50, 100), 
              cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

# Create another image for centered text
img2 = np.zeros((300, 500, 3), dtype=np.uint8)

centered_text = "Centered Text\\nLine 2\\nLine 3"
text.put_centered_text(img2, centered_text, 
                      cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

cv2.imshow("Multi-line Text", img)
cv2.imshow("Centered Text", img2)
cv2.waitKey(0)
cv2.destroyAllWindows()

Image Encoding Utilities

Functions for encoding and decoding images as base64 for transmission or storage.

def base64_encode_image(a):
    """
    Encode image array to base64 JSON string.
    
    Args:
        a (np.ndarray): Image array
    
    Returns:
        str: JSON string containing base64 data, dtype, and shape
    """

def base64_decode_image(a):
    """
    Decode base64 JSON string to image array.
    
    Args:
        a (str): JSON string from base64_encode_image
    
    Returns:
        np.ndarray: Decoded image array
    """

def base64_encode_array(a):
    """
    Encode numpy array to base64.
    
    Args:
        a (np.ndarray): Numpy array
    
    Returns:
        bytes: Base64 encoded data
    """

def base64_decode_array(a, dtype):
    """
    Decode base64 to numpy array.
    
    Args:
        a (bytes): Base64 encoded data
        dtype (str): NumPy data type
    
    Returns:
        np.ndarray: Decoded array
    """

Usage Example:

import cv2
from imutils import encodings

# Load image
image = cv2.imread("example.jpg")

# Encode image as base64 JSON
encoded = encodings.base64_encode_image(image)
print(f"Encoded size: {len(encoded)} characters")

# Decode back to image
decoded_image = encodings.base64_decode_image(encoded)

# Verify images are identical
are_equal = np.array_equal(image, decoded_image)
print(f"Images are identical: {are_equal}")

# Save decoded image
cv2.imwrite("decoded_image.jpg", decoded_image)

# Example of encoding/decoding just arrays
array_data = np.array([1, 2, 3, 4, 5], dtype=np.float32)
encoded_array = encodings.base64_encode_array(array_data)
decoded_array = encodings.base64_decode_array(encoded_array, "float32")

print(f"Array encoding/decoding successful: {np.array_equal(array_data, decoded_array)}")

Temporary File Management

Utility class for creating and managing temporary files.

class TempFile:
    def __init__(self, basePath="./", ext=".jpg"):
        """
        Create temporary file with unique name.
        
        Args:
            basePath (str): Base directory path (default: "./")
            ext (str): File extension (default: ".jpg")
        
        Attributes:
            path (str): Full path to temporary file
        """
    
    def cleanup(self):
        """Remove the temporary file from filesystem."""

Usage Example:

import cv2
import numpy as np
from imutils.io import TempFile

# Create some test data
test_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)

# Create temporary file
temp_file = TempFile(basePath="temp/", ext=".png")
print(f"Temporary file: {temp_file.path}")

# Write image to temporary file
cv2.imwrite(temp_file.path, test_image)

# Read image back
loaded_image = cv2.imread(temp_file.path)
print(f"Image shape: {loaded_image.shape}")

# Cleanup temporary file
temp_file.cleanup()
print("Temporary file removed")

# Context manager style usage
class TempFileContext:
    def __init__(self, basePath="./", ext=".jpg"):
        self.temp_file = TempFile(basePath, ext)
    
    def __enter__(self):
        return self.temp_file
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.temp_file.cleanup()

# Usage with context manager
with TempFileContext("temp/", ".png") as temp:
    cv2.imwrite(temp.path, test_image)
    # Process file...
    processed = cv2.imread(temp.path)
    # File automatically cleaned up when exiting context

Complete Utility Pipeline Example

Here's a comprehensive example using multiple utility functions:

import cv2
import numpy as np
import imutils
from imutils import contours, perspective, object_detection, paths, text
from imutils.io import TempFile

def process_document_images(input_dir, output_dir):
    """
    Process document images: detect documents, apply perspective correction,
    and save results with labels.
    """
    # List all images in input directory
    image_paths = list(paths.list_images(input_dir))
    print(f"Processing {len(image_paths)} images...")
    
    for i, image_path in enumerate(image_paths):
        print(f"Processing image {i+1}/{len(image_paths)}: {image_path}")
        
        # Load image
        original = cv2.imread(image_path)
        if original is None:
            continue
            
        # Resize for processing
        image = imutils.resize(original, height=500)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Edge detection and contour finding
        edged = imutils.auto_canny(gray)
        cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
        
        # Find document contour (largest 4-sided contour)
        document_contour = None
        for c in cnts:
            peri = cv2.arcLength(c, True)
            approx = cv2.approxPolyDP(c, 0.015 * peri, True)
            
            if len(approx) == 4:
                document_contour = approx
                break
        
        if document_contour is not None:
            # Apply perspective transform
            pts = document_contour.reshape(4, 2)
            warped = perspective.four_point_transform(image, pts)
            
            # Create output image with labels
            output = np.zeros((warped.shape[0] + 100, warped.shape[1], 3), dtype=np.uint8)
            output[100:, :] = warped
            
            # Add title text
            filename = image_path.split("/")[-1]
            title_text = f"Processed: {filename}\\nDocument {i+1}/{len(image_paths)}"
            text.put_centered_text(output[:100, :], title_text,
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # Save result using temporary file first (for atomic write)
            output_path = f"{output_dir}/processed_{i+1:03d}.jpg"
            temp_file = TempFile(basePath=output_dir, ext=".jpg")
            
            try:
                cv2.imwrite(temp_file.path, output)
                # Move temp file to final location (atomic operation)
                import shutil
                shutil.move(temp_file.path, output_path)
                print(f"Saved: {output_path}")
            except Exception as e:
                print(f"Error saving {output_path}: {e}")
                temp_file.cleanup()
        else:
            print(f"No document found in {image_path}")

def analyze_objects_in_image(image_path):
    """
    Analyze objects in image: find contours, sort them, and apply object detection.
    """
    # Load image
    image = cv2.imread(image_path)
    original = image.copy()
    
    # Preprocessing
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
    thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]
    
    # Find and sort contours
    cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    
    if len(cnts) == 0:
        print("No contours found")
        return
    
    # Sort contours from left to right
    (sorted_cnts, bounding_boxes) = contours.sort_contours(cnts, method="left-to-right")
    
    # Label contours
    labeled_image = original.copy()
    for (i, c) in enumerate(sorted_cnts):
        labeled_image = contours.label_contour(labeled_image, c, i)
    
    # Create bounding boxes for object detection simulation
    boxes = []
    for box in bounding_boxes:
        x, y, w, h = box
        boxes.append([x, y, x + w, y + h])
    
    boxes = np.array(boxes)
    
    # Simulate confidence scores
    confidence_scores = np.random.uniform(0.6, 0.9, len(boxes))
    
    # Apply non-maximum suppression
    if len(boxes) > 0:
        selected_boxes = object_detection.non_max_suppression(
            boxes, probs=confidence_scores, overlapThresh=0.3
        )
        
        # Draw results
        nms_image = original.copy()
        for (x1, y1, x2, y2) in selected_boxes:
            cv2.rectangle(nms_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Create comparison display
        comparison = np.hstack([labeled_image, nms_image])
        
        # Add labels
        text.put_text(comparison, "Sorted Contours", (10, 30),
                     cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        
        text.put_text(comparison, "After NMS", (labeled_image.shape[1] + 10, 30),
                     cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        
        cv2.imshow("Object Analysis", comparison)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        print(f"Found {len(cnts)} total contours")
        print(f"After NMS: {len(selected_boxes)} objects")

# Usage examples
if __name__ == "__main__":
    # Process document images
    process_document_images("input_documents/", "output_documents/")
    
    # Analyze objects in single image
    analyze_objects_in_image("objects.jpg")

Install with Tessl CLI

npx tessl i tessl/pypi-imutils

docs

core-processing.md

face-analysis.md

feature-detection.md

index.md

utilities.md

video-processing.md

tile.json