0
# Utility Functions
1
2
Specialized utilities for contour processing, perspective transforms, path handling, text rendering, image encoding, and temporary file management. These functions provide common operations needed in computer vision workflows.
3
4
## Capabilities
5
6
### Contour Processing
7
8
Utilities for sorting and labeling contours in computer vision applications.
9
10
```python { .api }
11
def sort_contours(cnts, method="left-to-right"):
12
"""
13
Sort contours by position.
14
15
Args:
16
cnts (list): List of contours
17
method (str): Sorting method (default: "left-to-right")
18
Options: "left-to-right", "right-to-left",
19
"top-to-bottom", "bottom-to-top"
20
21
Returns:
22
tuple: (sorted_contours, sorted_bounding_boxes)
23
"""
24
25
def label_contour(image, c, i, color=(0, 255, 0), thickness=2):
26
"""
27
Label contour with number.
28
29
Args:
30
image (np.ndarray): Input image
31
c (np.ndarray): Contour to label
32
i (int): Label number (will display as i+1)
33
color (tuple): BGR color for drawing (default: (0, 255, 0))
34
thickness (int): Line thickness (default: 2)
35
36
Returns:
37
np.ndarray: Image with labeled contour
38
"""
39
```
40
41
**Usage Example:**
42
```python
43
import cv2
44
import imutils
45
from imutils import contours
46
47
# Load and preprocess image
48
image = cv2.imread("objects.jpg")
49
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
50
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
51
thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]
52
53
# Find contours
54
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
55
cnts = imutils.grab_contours(cnts)
56
57
# Sort contours from left to right
58
(cnts, boundingBoxes) = contours.sort_contours(cnts, method="left-to-right")
59
60
# Label each contour
61
output = image.copy()
62
for (i, c) in enumerate(cnts):
63
output = contours.label_contour(output, c, i)
64
65
cv2.imshow("Sorted and Labeled Contours", output)
66
cv2.waitKey(0)
67
cv2.destroyAllWindows()
68
```
69
70
### Perspective Transformation
71
72
Functions for perspective correction and bird's-eye view transformations.
73
74
```python { .api }
75
def order_points(pts):
76
"""
77
Order quadrilateral points in consistent order.
78
79
Args:
80
pts (np.ndarray): 4 points defining a quadrilateral
81
82
Returns:
83
np.ndarray: Points ordered as [top-left, top-right, bottom-right, bottom-left]
84
"""
85
86
def four_point_transform(image, pts):
87
"""
88
Apply perspective transform for bird's eye view.
89
90
Args:
91
image (np.ndarray): Input image
92
pts (np.ndarray): 4 corner points of the region to transform
93
94
Returns:
95
np.ndarray: Transformed image with rectangular perspective
96
97
Note:
98
Automatically calculates the destination rectangle dimensions.
99
Points are ordered using order_points() for consistency.
100
"""
101
```
102
103
**Usage Example:**
104
```python
105
import cv2
106
import numpy as np
107
from imutils import perspective
108
109
# Load image
110
image = cv2.imread("document.jpg")
111
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
112
113
# Find edges and contours (document detection)
114
edged = cv2.Canny(gray, 75, 200)
115
cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
116
cnts = imutils.grab_contours(cnts)
117
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
118
119
# Find the document contour
120
for c in cnts:
121
peri = cv2.arcLength(c, True)
122
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
123
124
if len(approx) == 4:
125
screenCnt = approx
126
break
127
128
# Apply perspective transform
129
pts = screenCnt.reshape(4, 2)
130
warped = perspective.four_point_transform(image, pts)
131
132
cv2.imshow("Original", image)
133
cv2.imshow("Scanned", warped)
134
cv2.waitKey(0)
135
cv2.destroyAllWindows()
136
```
137
138
### Object Detection Utilities
139
140
Non-maximum suppression for object detection post-processing.
141
142
```python { .api }
143
def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
144
"""
145
Apply non-maximum suppression to bounding boxes.
146
147
Args:
148
boxes (np.ndarray): Array of bounding boxes (x1, y1, x2, y2) format
149
probs (np.ndarray, optional): Confidence scores for each box
150
overlapThresh (float): Overlap threshold for suppression (default: 0.3)
151
152
Returns:
153
np.ndarray: Array of selected bounding boxes after NMS
154
155
Note:
156
If probs is None, boxes are sorted by bottom-right y-coordinate.
157
Otherwise, boxes are sorted by confidence scores.
158
"""
159
```
160
161
**Usage Example:**
162
```python
163
import cv2
164
import numpy as np
165
from imutils import object_detection
166
167
# Example bounding boxes and confidence scores
168
boxes = np.array([
169
[100, 100, 200, 200],
170
[120, 120, 220, 220],
171
[300, 300, 400, 400],
172
[310, 310, 410, 410]
173
])
174
175
confidence_scores = np.array([0.9, 0.8, 0.95, 0.85])
176
177
# Apply non-maximum suppression
178
selected_boxes = object_detection.non_max_suppression(
179
boxes, probs=confidence_scores, overlapThresh=0.3
180
)
181
182
print(f"Original boxes: {len(boxes)}")
183
print(f"After NMS: {len(selected_boxes)}")
184
185
# Draw results on image
186
image = np.zeros((500, 500, 3), dtype=np.uint8)
187
188
# Draw all original boxes in red
189
for (x1, y1, x2, y2) in boxes:
190
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 1)
191
192
# Draw selected boxes in green (thicker)
193
for (x1, y1, x2, y2) in selected_boxes:
194
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
195
196
cv2.imshow("Non-Maximum Suppression", image)
197
cv2.waitKey(0)
198
cv2.destroyAllWindows()
199
```
200
201
### Path and File Utilities
202
203
Functions for listing image files and working with file paths.
204
205
```python { .api }
206
def list_images(basePath, contains=None):
207
"""
208
List image files in directory.
209
210
Args:
211
basePath (str): Base directory path to search
212
contains (str, optional): String that filename must contain
213
214
Returns:
215
generator: Generator yielding image file paths
216
217
Note:
218
Searches recursively through directory structure.
219
Supported extensions: .jpg, .jpeg, .png, .bmp, .tif, .tiff
220
"""
221
222
def list_files(basePath, validExts=None, contains=None):
223
"""
224
List files with optional filtering.
225
226
Args:
227
basePath (str): Base directory path to search
228
validExts (tuple, optional): Valid file extensions to include
229
contains (str, optional): String that filename must contain
230
231
Returns:
232
generator: Generator yielding file paths
233
"""
234
235
# Constants
236
image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")
237
```
238
239
**Usage Example:**
240
```python
241
from imutils import paths
242
import cv2
243
244
# List all images in directory
245
image_paths = list(paths.list_images("dataset/"))
246
print(f"Found {len(image_paths)} images")
247
248
# List images containing "face" in filename
249
face_images = list(paths.list_images("dataset/", contains="face"))
250
print(f"Found {len(face_images)} face images")
251
252
# List all Python files
253
python_files = list(paths.list_files("project/", validExts=(".py",)))
254
print(f"Found {len(python_files)} Python files")
255
256
# Process all images in directory
257
for image_path in paths.list_images("input_images/"):
258
print(f"Processing {image_path}")
259
image = cv2.imread(image_path)
260
261
# Process image here
262
processed = cv2.GaussianBlur(image, (15, 15), 0)
263
264
# Save processed image
265
output_path = image_path.replace("input_images", "output_images")
266
cv2.imwrite(output_path, processed)
267
```
268
269
### Text Rendering
270
271
Utilities for drawing text with line breaks and centering.
272
273
```python { .api }
274
def put_text(img, text, org, font_face, font_scale, color, thickness=1,
275
line_type=8, bottom_left_origin=False):
276
"""
277
Draw multi-line text with line breaks.
278
279
Args:
280
img (np.ndarray): Image to draw on (modified in place)
281
text (str): Text string (use \\n for line breaks)
282
org (tuple): (x, y) position of first line bottom-left corner
283
font_face (int): OpenCV font type
284
font_scale (float): Font scale factor
285
color (tuple): Text color (B, G, R)
286
thickness (int): Text thickness (default: 1)
287
line_type (int): Line type (default: 8)
288
bottom_left_origin (bool): Coordinate system origin (default: False)
289
290
Returns:
291
None: Image is modified in place
292
"""
293
294
def put_centered_text(img, text, font_face, font_scale, color, thickness=1, line_type=8):
295
"""
296
Draw vertically and horizontally centered multi-line text.
297
298
Args:
299
img (np.ndarray): Image to draw on (modified in place)
300
text (str): Text string (use \\n for line breaks)
301
font_face (int): OpenCV font type
302
font_scale (float): Font scale factor
303
color (tuple): Text color (B, G, R)
304
thickness (int): Text thickness (default: 1)
305
line_type (int): Line type (default: 8)
306
307
Returns:
308
None: Image is modified in place
309
"""
310
```
311
312
**Usage Example:**
313
```python
314
import cv2
315
import numpy as np
316
from imutils import text
317
318
# Create blank image
319
img = np.zeros((400, 600, 3), dtype=np.uint8)
320
321
# Multi-line text with line breaks
322
multiline_text = "This is line 1\\nThis is line 2\\nThis is line 3"
323
324
# Draw text at specific position
325
text.put_text(img, multiline_text, (50, 100),
326
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
327
328
# Create another image for centered text
329
img2 = np.zeros((300, 500, 3), dtype=np.uint8)
330
331
centered_text = "Centered Text\\nLine 2\\nLine 3"
332
text.put_centered_text(img2, centered_text,
333
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
334
335
cv2.imshow("Multi-line Text", img)
336
cv2.imshow("Centered Text", img2)
337
cv2.waitKey(0)
338
cv2.destroyAllWindows()
339
```
340
341
### Image Encoding Utilities
342
343
Functions for encoding and decoding images as base64 for transmission or storage.
344
345
```python { .api }
346
def base64_encode_image(a):
347
"""
348
Encode image array to base64 JSON string.
349
350
Args:
351
a (np.ndarray): Image array
352
353
Returns:
354
str: JSON string containing base64 data, dtype, and shape
355
"""
356
357
def base64_decode_image(a):
358
"""
359
Decode base64 JSON string to image array.
360
361
Args:
362
a (str): JSON string from base64_encode_image
363
364
Returns:
365
np.ndarray: Decoded image array
366
"""
367
368
def base64_encode_array(a):
369
"""
370
Encode numpy array to base64.
371
372
Args:
373
a (np.ndarray): Numpy array
374
375
Returns:
376
bytes: Base64 encoded data
377
"""
378
379
def base64_decode_array(a, dtype):
380
"""
381
Decode base64 to numpy array.
382
383
Args:
384
a (bytes): Base64 encoded data
385
dtype (str): NumPy data type
386
387
Returns:
388
np.ndarray: Decoded array
389
"""
390
```
391
392
**Usage Example:**
393
```python
394
import cv2
395
from imutils import encodings
396
397
# Load image
398
image = cv2.imread("example.jpg")
399
400
# Encode image as base64 JSON
401
encoded = encodings.base64_encode_image(image)
402
print(f"Encoded size: {len(encoded)} characters")
403
404
# Decode back to image
405
decoded_image = encodings.base64_decode_image(encoded)
406
407
# Verify images are identical
408
are_equal = np.array_equal(image, decoded_image)
409
print(f"Images are identical: {are_equal}")
410
411
# Save decoded image
412
cv2.imwrite("decoded_image.jpg", decoded_image)
413
414
# Example of encoding/decoding just arrays
415
array_data = np.array([1, 2, 3, 4, 5], dtype=np.float32)
416
encoded_array = encodings.base64_encode_array(array_data)
417
decoded_array = encodings.base64_decode_array(encoded_array, "float32")
418
419
print(f"Array encoding/decoding successful: {np.array_equal(array_data, decoded_array)}")
420
```
421
422
### Temporary File Management
423
424
Utility class for creating and managing temporary files.
425
426
```python { .api }
427
class TempFile:
428
def __init__(self, basePath="./", ext=".jpg"):
429
"""
430
Create temporary file with unique name.
431
432
Args:
433
basePath (str): Base directory path (default: "./")
434
ext (str): File extension (default: ".jpg")
435
436
Attributes:
437
path (str): Full path to temporary file
438
"""
439
440
def cleanup(self):
441
"""Remove the temporary file from filesystem."""
442
```
443
444
**Usage Example:**
445
```python
446
import cv2
447
import numpy as np
448
from imutils.io import TempFile
449
450
# Create some test data
451
test_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
452
453
# Create temporary file
454
temp_file = TempFile(basePath="temp/", ext=".png")
455
print(f"Temporary file: {temp_file.path}")
456
457
# Write image to temporary file
458
cv2.imwrite(temp_file.path, test_image)
459
460
# Read image back
461
loaded_image = cv2.imread(temp_file.path)
462
print(f"Image shape: {loaded_image.shape}")
463
464
# Cleanup temporary file
465
temp_file.cleanup()
466
print("Temporary file removed")
467
468
# Context manager style usage
469
class TempFileContext:
470
def __init__(self, basePath="./", ext=".jpg"):
471
self.temp_file = TempFile(basePath, ext)
472
473
def __enter__(self):
474
return self.temp_file
475
476
def __exit__(self, exc_type, exc_val, exc_tb):
477
self.temp_file.cleanup()
478
479
# Usage with context manager
480
with TempFileContext("temp/", ".png") as temp:
481
cv2.imwrite(temp.path, test_image)
482
# Process file...
483
processed = cv2.imread(temp.path)
484
# File automatically cleaned up when exiting context
485
```
486
487
### Complete Utility Pipeline Example
488
489
Here's a comprehensive example using multiple utility functions:
490
491
```python
492
import cv2
493
import numpy as np
494
import imutils
495
from imutils import contours, perspective, object_detection, paths, text
496
from imutils.io import TempFile
497
498
def process_document_images(input_dir, output_dir):
499
"""
500
Process document images: detect documents, apply perspective correction,
501
and save results with labels.
502
"""
503
# List all images in input directory
504
image_paths = list(paths.list_images(input_dir))
505
print(f"Processing {len(image_paths)} images...")
506
507
for i, image_path in enumerate(image_paths):
508
print(f"Processing image {i+1}/{len(image_paths)}: {image_path}")
509
510
# Load image
511
original = cv2.imread(image_path)
512
if original is None:
513
continue
514
515
# Resize for processing
516
image = imutils.resize(original, height=500)
517
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
518
519
# Edge detection and contour finding
520
edged = imutils.auto_canny(gray)
521
cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
522
cnts = imutils.grab_contours(cnts)
523
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
524
525
# Find document contour (largest 4-sided contour)
526
document_contour = None
527
for c in cnts:
528
peri = cv2.arcLength(c, True)
529
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
530
531
if len(approx) == 4:
532
document_contour = approx
533
break
534
535
if document_contour is not None:
536
# Apply perspective transform
537
pts = document_contour.reshape(4, 2)
538
warped = perspective.four_point_transform(image, pts)
539
540
# Create output image with labels
541
output = np.zeros((warped.shape[0] + 100, warped.shape[1], 3), dtype=np.uint8)
542
output[100:, :] = warped
543
544
# Add title text
545
filename = image_path.split("/")[-1]
546
title_text = f"Processed: {filename}\\nDocument {i+1}/{len(image_paths)}"
547
text.put_centered_text(output[:100, :], title_text,
548
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
549
550
# Save result using temporary file first (for atomic write)
551
output_path = f"{output_dir}/processed_{i+1:03d}.jpg"
552
temp_file = TempFile(basePath=output_dir, ext=".jpg")
553
554
try:
555
cv2.imwrite(temp_file.path, output)
556
# Move temp file to final location (atomic operation)
557
import shutil
558
shutil.move(temp_file.path, output_path)
559
print(f"Saved: {output_path}")
560
except Exception as e:
561
print(f"Error saving {output_path}: {e}")
562
temp_file.cleanup()
563
else:
564
print(f"No document found in {image_path}")
565
566
def analyze_objects_in_image(image_path):
567
"""
568
Analyze objects in image: find contours, sort them, and apply object detection.
569
"""
570
# Load image
571
image = cv2.imread(image_path)
572
original = image.copy()
573
574
# Preprocessing
575
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
576
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
577
thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]
578
579
# Find and sort contours
580
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
581
cnts = imutils.grab_contours(cnts)
582
583
if len(cnts) == 0:
584
print("No contours found")
585
return
586
587
# Sort contours from left to right
588
(sorted_cnts, bounding_boxes) = contours.sort_contours(cnts, method="left-to-right")
589
590
# Label contours
591
labeled_image = original.copy()
592
for (i, c) in enumerate(sorted_cnts):
593
labeled_image = contours.label_contour(labeled_image, c, i)
594
595
# Create bounding boxes for object detection simulation
596
boxes = []
597
for box in bounding_boxes:
598
x, y, w, h = box
599
boxes.append([x, y, x + w, y + h])
600
601
boxes = np.array(boxes)
602
603
# Simulate confidence scores
604
confidence_scores = np.random.uniform(0.6, 0.9, len(boxes))
605
606
# Apply non-maximum suppression
607
if len(boxes) > 0:
608
selected_boxes = object_detection.non_max_suppression(
609
boxes, probs=confidence_scores, overlapThresh=0.3
610
)
611
612
# Draw results
613
nms_image = original.copy()
614
for (x1, y1, x2, y2) in selected_boxes:
615
cv2.rectangle(nms_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
616
617
# Create comparison display
618
comparison = np.hstack([labeled_image, nms_image])
619
620
# Add labels
621
text.put_text(comparison, "Sorted Contours", (10, 30),
622
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
623
624
text.put_text(comparison, "After NMS", (labeled_image.shape[1] + 10, 30),
625
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
626
627
cv2.imshow("Object Analysis", comparison)
628
cv2.waitKey(0)
629
cv2.destroyAllWindows()
630
631
print(f"Found {len(cnts)} total contours")
632
print(f"After NMS: {len(selected_boxes)} objects")
633
634
# Usage examples
635
if __name__ == "__main__":
636
# Process document images
637
process_document_images("input_documents/", "output_documents/")
638
639
# Analyze objects in single image
640
analyze_objects_in_image("objects.jpg")
641
```