A simple, easy-to-use, and stable Android automation library
Computer vision capabilities for image-based UI automation, template matching, and visual element detection. Useful when traditional element selection methods are insufficient.
Detect and interact with UI elements using template images.
class Device:
@cached_property
def image(self) -> ImageX:
"""Access image processing functionality"""
class ImageX:
def __init__(self, device: Device):
"""Initialize with device reference"""
def click(self, template, **kwargs):
"""
Click on element matching template image.
Parameters:
- template: Template image path or PIL Image
- threshold: Match confidence threshold (0.0-1.0)
- timeout: Wait timeout for element to appear
- method: Template matching method
"""
def match(self, template, **kwargs) -> List[Dict]:
"""
Find all matches for template image.
Parameters:
- template: Template image path or PIL Image
- threshold: Match confidence threshold
- method: Template matching method
Returns:
List of match dictionaries with position and confidence
"""
def wait(self, template, timeout: float = 20.0, **kwargs) -> bool:
"""
Wait for template image to appear.
Parameters:
- template: Template image path or PIL Image
- timeout: Maximum wait time
- threshold: Match confidence threshold
Returns:
bool: True if template found
"""Usage examples:
d = u2.connect()
# Click on button using template image
d.image.click("button_template.png", threshold=0.8)
# Find all matches for an icon
matches = d.image.match("icon.png", threshold=0.9)
for match in matches:
print(f"Found at: {match['result']}, confidence: {match['confidence']}")
# Wait for loading spinner to appear
if d.image.wait("loading_spinner.png", timeout=10):
print("Loading started")
# Wait for loading to finish (template disappears)
while d.image.match("loading_spinner.png", threshold=0.8):
time.sleep(0.5)
print("Loading finished")Different algorithms for template matching with various accuracy and performance characteristics.
d = u2.connect()
# Template matching methods
d.image.click("button.png", method="cv2.TM_CCOEFF_NORMED") # Default, good balance
d.image.click("icon.png", method="cv2.TM_SQDIFF_NORMED") # Good for exact matches
d.image.click("logo.png", method="cv2.TM_CCORR_NORMED") # Fast but less accurate
# Adjust threshold based on method
d.image.click("element.png", method="cv2.TM_CCOEFF_NORMED", threshold=0.9)
d.image.click("element.png", method="cv2.TM_SQDIFF_NORMED", threshold=0.1) # Lower is better for SQDIFFProcess specific screen regions for improved performance and accuracy.
d = u2.connect()
# Take screenshot for manual processing
screenshot = d.screenshot()
# Define region of interest
region = (100, 100, 500, 400) # (left, top, right, bottom)
cropped = screenshot.crop(region)
cropped.save("region.png")
# Process cropped region
matches = d.image.match("target.png", region=region)
# Adjust coordinates for region offset
for match in matches:
x, y = match['result']
actual_x = x + region[0]
actual_y = y + region[1]
print(f"Actual position: ({actual_x}, {actual_y})")Detect multiple template variations or states.
d = u2.connect()
# Check for multiple button states
button_templates = [
"button_normal.png",
"button_pressed.png",
"button_disabled.png"
]
found_template = None
for template in button_templates:
if d.image.match(template, threshold=0.8):
found_template = template
print(f"Found button state: {template}")
break
if found_template == "button_normal.png":
d.image.click(found_template)
elif found_template == "button_disabled.png":
print("Button is disabled")Handle UI elements that change appearance or position.
d = u2.connect()
def wait_for_any_template(templates, timeout=10):
"""Wait for any of the provided templates to appear"""
import time
deadline = time.time() + timeout
while time.time() < deadline:
for template in templates:
if d.image.match(template, threshold=0.8):
return template
time.sleep(0.5)
return None
# Wait for dialog to appear in any state
dialog_templates = [
"success_dialog.png",
"error_dialog.png",
"warning_dialog.png"
]
found_dialog = wait_for_any_template(dialog_templates)
if found_dialog:
print(f"Dialog appeared: {found_dialog}")
# Handle different dialog types
if "success" in found_dialog:
d.image.click("ok_button.png")
elif "error" in found_dialog:
d.image.click("retry_button.png")
elif "warning" in found_dialog:
d.image.click("continue_button.png")Use template matching for scrolling and navigation operations.
d = u2.connect()
def scroll_to_element(template, max_scrolls=10):
"""Scroll until template element is visible"""
for i in range(max_scrolls):
if d.image.match(template, threshold=0.8):
return True
# Scroll down
d.swipe(0.5, 0.7, 0.5, 0.3)
time.sleep(1)
return False
# Scroll to find specific item
if scroll_to_element("target_item.png"):
d.image.click("target_item.png")
print("Found and clicked target item")
else:
print("Target item not found after scrolling")
# Navigate using visual landmarks
def navigate_with_landmarks():
# Look for navigation breadcrumbs
if d.image.match("home_icon.png"):
return "home"
elif d.image.match("settings_icon.png"):
return "settings"
elif d.image.match("profile_icon.png"):
return "profile"
return "unknown"
current_screen = navigate_with_landmarks()
print(f"Current screen: {current_screen}")Best practices for creating and managing template images.
d = u2.connect()
def capture_element_template(selector, template_name):
"""Capture template image of UI element"""
element = d(**selector)
if element.exists:
# Take element screenshot
element_img = element.screenshot()
element_img.save(f"{template_name}.png")
print(f"Saved template: {template_name}.png")
return True
return False
# Capture templates for later use
capture_element_template({"text": "Login"}, "login_button")
capture_element_template({"resourceId": "logo"}, "app_logo")
capture_element_template({"className": "android.widget.ProgressBar"}, "loading_spinner")
# Test captured templates
templates_to_test = ["login_button.png", "app_logo.png", "loading_spinner.png"]
for template in templates_to_test:
matches = d.image.match(template, threshold=0.8)
print(f"{template}: {len(matches)} matches found")Advanced image processing techniques for challenging scenarios.
d = u2.connect()
# Handle different screen densities
def adaptive_threshold_click(template, base_threshold=0.8):
"""Adaptively adjust threshold based on screen density"""
info = d.info
dpi = info.get('displayDensity', 320)
# Adjust threshold based on DPI
if dpi < 240: # Low DPI
threshold = base_threshold - 0.1
elif dpi > 480: # High DPI
threshold = base_threshold + 0.1
else:
threshold = base_threshold
return d.image.click(template, threshold=threshold)
# Multi-scale template matching
def multi_scale_match(template, scales=[0.8, 1.0, 1.2]):
"""Try template matching at different scales"""
from PIL import Image
template_img = Image.open(template)
best_match = None
best_confidence = 0
for scale in scales:
# Resize template
new_size = (int(template_img.width * scale), int(template_img.height * scale))
scaled_template = template_img.resize(new_size)
scaled_template.save(f"temp_scaled_{scale}.png")
# Try matching
matches = d.image.match(f"temp_scaled_{scale}.png", threshold=0.7)
for match in matches:
if match['confidence'] > best_confidence:
best_confidence = match['confidence']
best_match = match
return best_match
# Use adaptive and multi-scale matching
adaptive_threshold_click("button.png")
best_match = multi_scale_match("icon.png")
if best_match:
d.click(best_match['result'][0], best_match['result'][1])Combine image processing with traditional element selection for robust automation.
d = u2.connect()
def robust_element_click(text=None, resource_id=None, template=None):
"""Try multiple selection methods in order of preference"""
# Try text selector first (fastest)
if text and d(text=text).exists:
d(text=text).click()
return "text_selector"
# Try resource ID selector
if resource_id and d(resourceId=resource_id).exists:
d(resourceId=resource_id).click()
return "resource_id_selector"
# Fall back to image template matching
if template and d.image.match(template, threshold=0.8):
d.image.click(template)
return "image_template"
return None
# Use robust selection
result = robust_element_click(
text="Submit",
resource_id="com.example:id/submit_btn",
template="submit_button.png"
)
if result:
print(f"Clicked using: {result}")
else:
print("Element not found using any method")Install with Tessl CLI
npx tessl i tessl/pypi-uiautomator2@3.2.1