or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

app-management.mddevice-management.mdimage-processing.mdindex.mdscreen-input.mdui-interaction.mdwatchers-automation.mdxpath-selection.md

image-processing.mddocs/

0

# Image Processing and Template Matching

1

2

Computer vision capabilities for image-based UI automation, template matching, and visual element detection. Useful when traditional element selection methods are insufficient.

3

4

## Capabilities

5

6

### Image-based Element Detection

7

8

Detect and interact with UI elements using template images.

9

10

```python { .api }

11

class Device:

12

@cached_property

13

def image(self) -> ImageX:

14

"""Access image processing functionality"""

15

16

class ImageX:

17

def __init__(self, device: Device):

18

"""Initialize with device reference"""

19

20

def click(self, template, **kwargs):

21

"""

22

Click on element matching template image.

23

24

Parameters:

25

- template: Template image path or PIL Image

26

- threshold: Match confidence threshold (0.0-1.0)

27

- timeout: Wait timeout for element to appear

28

- method: Template matching method

29

"""

30

31

def match(self, template, **kwargs) -> List[Dict]:

32

"""

33

Find all matches for template image.

34

35

Parameters:

36

- template: Template image path or PIL Image

37

- threshold: Match confidence threshold

38

- method: Template matching method

39

40

Returns:

41

List of match dictionaries with position and confidence

42

"""

43

44

def wait(self, template, timeout: float = 20.0, **kwargs) -> bool:

45

"""

46

Wait for template image to appear.

47

48

Parameters:

49

- template: Template image path or PIL Image

50

- timeout: Maximum wait time

51

- threshold: Match confidence threshold

52

53

Returns:

54

bool: True if template found

55

"""

56

```

57

58

Usage examples:

59

60

```python

61

d = u2.connect()

62

63

# Click on button using template image

64

d.image.click("button_template.png", threshold=0.8)

65

66

# Find all matches for an icon

67

matches = d.image.match("icon.png", threshold=0.9)

68

for match in matches:

69

print(f"Found at: {match['result']}, confidence: {match['confidence']}")

70

71

# Wait for loading spinner to appear

72

if d.image.wait("loading_spinner.png", timeout=10):

73

print("Loading started")

74

75

# Wait for loading to finish (template disappears)

76

while d.image.match("loading_spinner.png", threshold=0.8):

77

time.sleep(0.5)

78

print("Loading finished")

79

```

80

81

### Template Matching Methods

82

83

Different algorithms for template matching with various accuracy and performance characteristics.

84

85

```python

86

d = u2.connect()

87

88

# Template matching methods

89

d.image.click("button.png", method="cv2.TM_CCOEFF_NORMED") # Default, good balance

90

d.image.click("icon.png", method="cv2.TM_SQDIFF_NORMED") # Good for exact matches

91

d.image.click("logo.png", method="cv2.TM_CCORR_NORMED") # Fast but less accurate

92

93

# Adjust threshold based on method

94

d.image.click("element.png", method="cv2.TM_CCOEFF_NORMED", threshold=0.9)

95

d.image.click("element.png", method="cv2.TM_SQDIFF_NORMED", threshold=0.1) # Lower is better for SQDIFF

96

```

97

98

### Screen Region Processing

99

100

Process specific screen regions for improved performance and accuracy.

101

102

```python

103

d = u2.connect()

104

105

# Take screenshot for manual processing

106

screenshot = d.screenshot()

107

108

# Define region of interest

109

region = (100, 100, 500, 400) # (left, top, right, bottom)

110

cropped = screenshot.crop(region)

111

cropped.save("region.png")

112

113

# Process cropped region

114

matches = d.image.match("target.png", region=region)

115

116

# Adjust coordinates for region offset

117

for match in matches:

118

x, y = match['result']

119

actual_x = x + region[0]

120

actual_y = y + region[1]

121

print(f"Actual position: ({actual_x}, {actual_y})")

122

```

123

124

### Multi-Template Detection

125

126

Detect multiple template variations or states.

127

128

```python

129

d = u2.connect()

130

131

# Check for multiple button states

132

button_templates = [

133

"button_normal.png",

134

"button_pressed.png",

135

"button_disabled.png"

136

]

137

138

found_template = None

139

for template in button_templates:

140

if d.image.match(template, threshold=0.8):

141

found_template = template

142

print(f"Found button state: {template}")

143

break

144

145

if found_template == "button_normal.png":

146

d.image.click(found_template)

147

elif found_template == "button_disabled.png":

148

print("Button is disabled")

149

```

150

151

### Dynamic Template Matching

152

153

Handle UI elements that change appearance or position.

154

155

```python

156

d = u2.connect()

157

158

def wait_for_any_template(templates, timeout=10):

159

"""Wait for any of the provided templates to appear"""

160

import time

161

deadline = time.time() + timeout

162

163

while time.time() < deadline:

164

for template in templates:

165

if d.image.match(template, threshold=0.8):

166

return template

167

time.sleep(0.5)

168

return None

169

170

# Wait for dialog to appear in any state

171

dialog_templates = [

172

"success_dialog.png",

173

"error_dialog.png",

174

"warning_dialog.png"

175

]

176

177

found_dialog = wait_for_any_template(dialog_templates)

178

if found_dialog:

179

print(f"Dialog appeared: {found_dialog}")

180

181

# Handle different dialog types

182

if "success" in found_dialog:

183

d.image.click("ok_button.png")

184

elif "error" in found_dialog:

185

d.image.click("retry_button.png")

186

elif "warning" in found_dialog:

187

d.image.click("continue_button.png")

188

```

189

190

### Image-based Scrolling and Navigation

191

192

Use template matching for scrolling and navigation operations.

193

194

```python

195

d = u2.connect()

196

197

def scroll_to_element(template, max_scrolls=10):

198

"""Scroll until template element is visible"""

199

for i in range(max_scrolls):

200

if d.image.match(template, threshold=0.8):

201

return True

202

203

# Scroll down

204

d.swipe(0.5, 0.7, 0.5, 0.3)

205

time.sleep(1)

206

207

return False

208

209

# Scroll to find specific item

210

if scroll_to_element("target_item.png"):

211

d.image.click("target_item.png")

212

print("Found and clicked target item")

213

else:

214

print("Target item not found after scrolling")

215

216

# Navigate using visual landmarks

217

def navigate_with_landmarks():

218

# Look for navigation breadcrumbs

219

if d.image.match("home_icon.png"):

220

return "home"

221

elif d.image.match("settings_icon.png"):

222

return "settings"

223

elif d.image.match("profile_icon.png"):

224

return "profile"

225

return "unknown"

226

227

current_screen = navigate_with_landmarks()

228

print(f"Current screen: {current_screen}")

229

```

230

231

### Template Creation and Management

232

233

Best practices for creating and managing template images.

234

235

```python

236

d = u2.connect()

237

238

def capture_element_template(selector, template_name):

239

"""Capture template image of UI element"""

240

element = d(**selector)

241

if element.exists:

242

# Take element screenshot

243

element_img = element.screenshot()

244

element_img.save(f"{template_name}.png")

245

print(f"Saved template: {template_name}.png")

246

return True

247

return False

248

249

# Capture templates for later use

250

capture_element_template({"text": "Login"}, "login_button")

251

capture_element_template({"resourceId": "logo"}, "app_logo")

252

capture_element_template({"className": "android.widget.ProgressBar"}, "loading_spinner")

253

254

# Test captured templates

255

templates_to_test = ["login_button.png", "app_logo.png", "loading_spinner.png"]

256

for template in templates_to_test:

257

matches = d.image.match(template, threshold=0.8)

258

print(f"{template}: {len(matches)} matches found")

259

```

260

261

### Advanced Image Processing

262

263

Advanced image processing techniques for challenging scenarios.

264

265

```python

266

d = u2.connect()

267

268

# Handle different screen densities

269

def adaptive_threshold_click(template, base_threshold=0.8):

270

"""Adaptively adjust threshold based on screen density"""

271

info = d.info

272

dpi = info.get('displayDensity', 320)

273

274

# Adjust threshold based on DPI

275

if dpi < 240: # Low DPI

276

threshold = base_threshold - 0.1

277

elif dpi > 480: # High DPI

278

threshold = base_threshold + 0.1

279

else:

280

threshold = base_threshold

281

282

return d.image.click(template, threshold=threshold)

283

284

# Multi-scale template matching

285

def multi_scale_match(template, scales=[0.8, 1.0, 1.2]):

286

"""Try template matching at different scales"""

287

from PIL import Image

288

289

template_img = Image.open(template)

290

best_match = None

291

best_confidence = 0

292

293

for scale in scales:

294

# Resize template

295

new_size = (int(template_img.width * scale), int(template_img.height * scale))

296

scaled_template = template_img.resize(new_size)

297

scaled_template.save(f"temp_scaled_{scale}.png")

298

299

# Try matching

300

matches = d.image.match(f"temp_scaled_{scale}.png", threshold=0.7)

301

for match in matches:

302

if match['confidence'] > best_confidence:

303

best_confidence = match['confidence']

304

best_match = match

305

306

return best_match

307

308

# Use adaptive and multi-scale matching

309

adaptive_threshold_click("button.png")

310

best_match = multi_scale_match("icon.png")

311

if best_match:

312

d.click(best_match['result'][0], best_match['result'][1])

313

```

314

315

### Integration with Other Selection Methods

316

317

Combine image processing with traditional element selection for robust automation.

318

319

```python

320

d = u2.connect()

321

322

def robust_element_click(text=None, resource_id=None, template=None):

323

"""Try multiple selection methods in order of preference"""

324

325

# Try text selector first (fastest)

326

if text and d(text=text).exists:

327

d(text=text).click()

328

return "text_selector"

329

330

# Try resource ID selector

331

if resource_id and d(resourceId=resource_id).exists:

332

d(resourceId=resource_id).click()

333

return "resource_id_selector"

334

335

# Fall back to image template matching

336

if template and d.image.match(template, threshold=0.8):

337

d.image.click(template)

338

return "image_template"

339

340

return None

341

342

# Use robust selection

343

result = robust_element_click(

344

text="Submit",

345

resource_id="com.example:id/submit_btn",

346

template="submit_button.png"

347

)

348

349

if result:

350

print(f"Clicked using: {result}")

351

else:

352

print("Element not found using any method")

353

```