0
# Image Processing and Template Matching
1
2
Computer vision capabilities for image-based UI automation, template matching, and visual element detection. Useful when traditional element selection methods are insufficient.
3
4
## Capabilities
5
6
### Image-based Element Detection
7
8
Detect and interact with UI elements using template images.
9
10
```python { .api }
11
class Device:
12
@cached_property
13
def image(self) -> ImageX:
14
"""Access image processing functionality"""
15
16
class ImageX:
17
def __init__(self, device: Device):
18
"""Initialize with device reference"""
19
20
def click(self, template, **kwargs):
21
"""
22
Click on element matching template image.
23
24
Parameters:
25
- template: Template image path or PIL Image
26
- threshold: Match confidence threshold (0.0-1.0)
27
- timeout: Wait timeout for element to appear
28
- method: Template matching method
29
"""
30
31
def match(self, template, **kwargs) -> List[Dict]:
32
"""
33
Find all matches for template image.
34
35
Parameters:
36
- template: Template image path or PIL Image
37
- threshold: Match confidence threshold
38
- method: Template matching method
39
40
Returns:
41
List of match dictionaries with position and confidence
42
"""
43
44
def wait(self, template, timeout: float = 20.0, **kwargs) -> bool:
45
"""
46
Wait for template image to appear.
47
48
Parameters:
49
- template: Template image path or PIL Image
50
- timeout: Maximum wait time
51
- threshold: Match confidence threshold
52
53
Returns:
54
bool: True if template found
55
"""
56
```
57
58
Usage examples:
59
60
```python
61
d = u2.connect()
62
63
# Click on button using template image
64
d.image.click("button_template.png", threshold=0.8)
65
66
# Find all matches for an icon
67
matches = d.image.match("icon.png", threshold=0.9)
68
for match in matches:
69
print(f"Found at: {match['result']}, confidence: {match['confidence']}")
70
71
# Wait for loading spinner to appear
72
if d.image.wait("loading_spinner.png", timeout=10):
73
print("Loading started")
74
75
# Wait for loading to finish (template disappears)
76
while d.image.match("loading_spinner.png", threshold=0.8):
77
time.sleep(0.5)
78
print("Loading finished")
79
```
80
81
### Template Matching Methods
82
83
Different algorithms for template matching with various accuracy and performance characteristics.
84
85
```python
86
d = u2.connect()
87
88
# Template matching methods
89
d.image.click("button.png", method="cv2.TM_CCOEFF_NORMED") # Default, good balance
90
d.image.click("icon.png", method="cv2.TM_SQDIFF_NORMED") # Good for exact matches
91
d.image.click("logo.png", method="cv2.TM_CCORR_NORMED") # Fast but less accurate
92
93
# Adjust threshold based on method
94
d.image.click("element.png", method="cv2.TM_CCOEFF_NORMED", threshold=0.9)
95
d.image.click("element.png", method="cv2.TM_SQDIFF_NORMED", threshold=0.1) # Lower is better for SQDIFF
96
```
97
98
### Screen Region Processing
99
100
Process specific screen regions for improved performance and accuracy.
101
102
```python
103
d = u2.connect()
104
105
# Take screenshot for manual processing
106
screenshot = d.screenshot()
107
108
# Define region of interest
109
region = (100, 100, 500, 400) # (left, top, right, bottom)
110
cropped = screenshot.crop(region)
111
cropped.save("region.png")
112
113
# Process cropped region
114
matches = d.image.match("target.png", region=region)
115
116
# Adjust coordinates for region offset
117
for match in matches:
118
x, y = match['result']
119
actual_x = x + region[0]
120
actual_y = y + region[1]
121
print(f"Actual position: ({actual_x}, {actual_y})")
122
```
123
124
### Multi-Template Detection
125
126
Detect multiple template variations or states.
127
128
```python
129
d = u2.connect()
130
131
# Check for multiple button states
132
button_templates = [
133
"button_normal.png",
134
"button_pressed.png",
135
"button_disabled.png"
136
]
137
138
found_template = None
139
for template in button_templates:
140
if d.image.match(template, threshold=0.8):
141
found_template = template
142
print(f"Found button state: {template}")
143
break
144
145
if found_template == "button_normal.png":
146
d.image.click(found_template)
147
elif found_template == "button_disabled.png":
148
print("Button is disabled")
149
```
150
151
### Dynamic Template Matching
152
153
Handle UI elements that change appearance or position.
154
155
```python
156
d = u2.connect()
157
158
def wait_for_any_template(templates, timeout=10):
159
"""Wait for any of the provided templates to appear"""
160
import time
161
deadline = time.time() + timeout
162
163
while time.time() < deadline:
164
for template in templates:
165
if d.image.match(template, threshold=0.8):
166
return template
167
time.sleep(0.5)
168
return None
169
170
# Wait for dialog to appear in any state
171
dialog_templates = [
172
"success_dialog.png",
173
"error_dialog.png",
174
"warning_dialog.png"
175
]
176
177
found_dialog = wait_for_any_template(dialog_templates)
178
if found_dialog:
179
print(f"Dialog appeared: {found_dialog}")
180
181
# Handle different dialog types
182
if "success" in found_dialog:
183
d.image.click("ok_button.png")
184
elif "error" in found_dialog:
185
d.image.click("retry_button.png")
186
elif "warning" in found_dialog:
187
d.image.click("continue_button.png")
188
```
189
190
### Image-based Scrolling and Navigation
191
192
Use template matching for scrolling and navigation operations.
193
194
```python
195
d = u2.connect()
196
197
def scroll_to_element(template, max_scrolls=10):
198
"""Scroll until template element is visible"""
199
for i in range(max_scrolls):
200
if d.image.match(template, threshold=0.8):
201
return True
202
203
# Scroll down
204
d.swipe(0.5, 0.7, 0.5, 0.3)
205
time.sleep(1)
206
207
return False
208
209
# Scroll to find specific item
210
if scroll_to_element("target_item.png"):
211
d.image.click("target_item.png")
212
print("Found and clicked target item")
213
else:
214
print("Target item not found after scrolling")
215
216
# Navigate using visual landmarks
217
def navigate_with_landmarks():
218
# Look for navigation breadcrumbs
219
if d.image.match("home_icon.png"):
220
return "home"
221
elif d.image.match("settings_icon.png"):
222
return "settings"
223
elif d.image.match("profile_icon.png"):
224
return "profile"
225
return "unknown"
226
227
current_screen = navigate_with_landmarks()
228
print(f"Current screen: {current_screen}")
229
```
230
231
### Template Creation and Management
232
233
Best practices for creating and managing template images.
234
235
```python
236
d = u2.connect()
237
238
def capture_element_template(selector, template_name):
239
"""Capture template image of UI element"""
240
element = d(**selector)
241
if element.exists:
242
# Take element screenshot
243
element_img = element.screenshot()
244
element_img.save(f"{template_name}.png")
245
print(f"Saved template: {template_name}.png")
246
return True
247
return False
248
249
# Capture templates for later use
250
capture_element_template({"text": "Login"}, "login_button")
251
capture_element_template({"resourceId": "logo"}, "app_logo")
252
capture_element_template({"className": "android.widget.ProgressBar"}, "loading_spinner")
253
254
# Test captured templates
255
templates_to_test = ["login_button.png", "app_logo.png", "loading_spinner.png"]
256
for template in templates_to_test:
257
matches = d.image.match(template, threshold=0.8)
258
print(f"{template}: {len(matches)} matches found")
259
```
260
261
### Advanced Image Processing
262
263
Advanced image processing techniques for challenging scenarios.
264
265
```python
266
d = u2.connect()
267
268
# Handle different screen densities
269
def adaptive_threshold_click(template, base_threshold=0.8):
270
"""Adaptively adjust threshold based on screen density"""
271
info = d.info
272
dpi = info.get('displayDensity', 320)
273
274
# Adjust threshold based on DPI
275
if dpi < 240: # Low DPI
276
threshold = base_threshold - 0.1
277
elif dpi > 480: # High DPI
278
threshold = base_threshold + 0.1
279
else:
280
threshold = base_threshold
281
282
return d.image.click(template, threshold=threshold)
283
284
# Multi-scale template matching
285
def multi_scale_match(template, scales=[0.8, 1.0, 1.2]):
286
"""Try template matching at different scales"""
287
from PIL import Image
288
289
template_img = Image.open(template)
290
best_match = None
291
best_confidence = 0
292
293
for scale in scales:
294
# Resize template
295
new_size = (int(template_img.width * scale), int(template_img.height * scale))
296
scaled_template = template_img.resize(new_size)
297
scaled_template.save(f"temp_scaled_{scale}.png")
298
299
# Try matching
300
matches = d.image.match(f"temp_scaled_{scale}.png", threshold=0.7)
301
for match in matches:
302
if match['confidence'] > best_confidence:
303
best_confidence = match['confidence']
304
best_match = match
305
306
return best_match
307
308
# Use adaptive and multi-scale matching
309
adaptive_threshold_click("button.png")
310
best_match = multi_scale_match("icon.png")
311
if best_match:
312
d.click(best_match['result'][0], best_match['result'][1])
313
```
314
315
### Integration with Other Selection Methods
316
317
Combine image processing with traditional element selection for robust automation.
318
319
```python
320
d = u2.connect()
321
322
def robust_element_click(text=None, resource_id=None, template=None):
323
"""Try multiple selection methods in order of preference"""
324
325
# Try text selector first (fastest)
326
if text and d(text=text).exists:
327
d(text=text).click()
328
return "text_selector"
329
330
# Try resource ID selector
331
if resource_id and d(resourceId=resource_id).exists:
332
d(resourceId=resource_id).click()
333
return "resource_id_selector"
334
335
# Fall back to image template matching
336
if template and d.image.match(template, threshold=0.8):
337
d.image.click(template)
338
return "image_template"
339
340
return None
341
342
# Use robust selection
343
result = robust_element_click(
344
text="Submit",
345
resource_id="com.example:id/submit_btn",
346
template="submit_button.png"
347
)
348
349
if result:
350
print(f"Clicked using: {result}")
351
else:
352
print("Element not found using any method")
353
```