CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-uiautomator2

A simple, easy-to-use, and stable Android automation library

Overview
Eval results
Files

image-processing.mddocs/

Image Processing and Template Matching

Computer vision capabilities for image-based UI automation, template matching, and visual element detection. Useful when traditional element selection methods are insufficient.

Capabilities

Image-based Element Detection

Detect and interact with UI elements using template images.

class Device:
    @cached_property
    def image(self) -> ImageX:
        """Access image processing functionality"""

class ImageX:
    def __init__(self, device: Device):
        """Initialize with device reference"""

    def click(self, template, **kwargs):
        """
        Click on element matching template image.

        Parameters:
        - template: Template image path or PIL Image
        - threshold: Match confidence threshold (0.0-1.0)
        - timeout: Wait timeout for element to appear
        - method: Template matching method
        """

    def match(self, template, **kwargs) -> List[Dict]:
        """
        Find all matches for template image.

        Parameters:
        - template: Template image path or PIL Image
        - threshold: Match confidence threshold
        - method: Template matching method

        Returns:
        List of match dictionaries with position and confidence
        """

    def wait(self, template, timeout: float = 20.0, **kwargs) -> bool:
        """
        Wait for template image to appear.

        Parameters:
        - template: Template image path or PIL Image
        - timeout: Maximum wait time
        - threshold: Match confidence threshold

        Returns:
        bool: True if template found
        """

Usage examples:

d = u2.connect()

# Click on button using template image
d.image.click("button_template.png", threshold=0.8)

# Find all matches for an icon
matches = d.image.match("icon.png", threshold=0.9)
for match in matches:
    print(f"Found at: {match['result']}, confidence: {match['confidence']}")

# Wait for loading spinner to appear
if d.image.wait("loading_spinner.png", timeout=10):
    print("Loading started")
    
# Wait for loading to finish (template disappears)
while d.image.match("loading_spinner.png", threshold=0.8):
    time.sleep(0.5)
print("Loading finished")

Template Matching Methods

Different algorithms for template matching with various accuracy and performance characteristics.

d = u2.connect()

# Template matching methods
d.image.click("button.png", method="cv2.TM_CCOEFF_NORMED")  # Default, good balance
d.image.click("icon.png", method="cv2.TM_SQDIFF_NORMED")    # Good for exact matches
d.image.click("logo.png", method="cv2.TM_CCORR_NORMED")     # Fast but less accurate

# Adjust threshold based on method
d.image.click("element.png", method="cv2.TM_CCOEFF_NORMED", threshold=0.9)
d.image.click("element.png", method="cv2.TM_SQDIFF_NORMED", threshold=0.1)  # Lower is better for SQDIFF

Screen Region Processing

Process specific screen regions for improved performance and accuracy.

d = u2.connect()

# Take screenshot for manual processing
screenshot = d.screenshot()

# Define region of interest
region = (100, 100, 500, 400)  # (left, top, right, bottom)
cropped = screenshot.crop(region)
cropped.save("region.png")

# Process cropped region
matches = d.image.match("target.png", region=region)

# Adjust coordinates for region offset
for match in matches:
    x, y = match['result']
    actual_x = x + region[0]
    actual_y = y + region[1]
    print(f"Actual position: ({actual_x}, {actual_y})")

Multi-Template Detection

Detect multiple template variations or states.

d = u2.connect()

# Check for multiple button states
button_templates = [
    "button_normal.png",
    "button_pressed.png", 
    "button_disabled.png"
]

found_template = None
for template in button_templates:
    if d.image.match(template, threshold=0.8):
        found_template = template
        print(f"Found button state: {template}")
        break

if found_template == "button_normal.png":
    d.image.click(found_template)
elif found_template == "button_disabled.png":
    print("Button is disabled")

Dynamic Template Matching

Handle UI elements that change appearance or position.

d = u2.connect()

def wait_for_any_template(templates, timeout=10):
    """Wait for any of the provided templates to appear"""
    import time
    deadline = time.time() + timeout
    
    while time.time() < deadline:
        for template in templates:
            if d.image.match(template, threshold=0.8):
                return template
        time.sleep(0.5)
    return None

# Wait for dialog to appear in any state
dialog_templates = [
    "success_dialog.png",
    "error_dialog.png", 
    "warning_dialog.png"
]

found_dialog = wait_for_any_template(dialog_templates)
if found_dialog:
    print(f"Dialog appeared: {found_dialog}")
    
    # Handle different dialog types
    if "success" in found_dialog:
        d.image.click("ok_button.png")
    elif "error" in found_dialog:
        d.image.click("retry_button.png")
    elif "warning" in found_dialog:
        d.image.click("continue_button.png")

Image-based Scrolling and Navigation

Use template matching for scrolling and navigation operations.

d = u2.connect()

def scroll_to_element(template, max_scrolls=10):
    """Scroll until template element is visible"""
    for i in range(max_scrolls):
        if d.image.match(template, threshold=0.8):
            return True
        
        # Scroll down
        d.swipe(0.5, 0.7, 0.5, 0.3)
        time.sleep(1)
    
    return False

# Scroll to find specific item
if scroll_to_element("target_item.png"):
    d.image.click("target_item.png")
    print("Found and clicked target item")
else:
    print("Target item not found after scrolling")

# Navigate using visual landmarks
def navigate_with_landmarks():
    # Look for navigation breadcrumbs
    if d.image.match("home_icon.png"):
        return "home"
    elif d.image.match("settings_icon.png"):
        return "settings"
    elif d.image.match("profile_icon.png"):
        return "profile"
    return "unknown"

current_screen = navigate_with_landmarks()
print(f"Current screen: {current_screen}")

Template Creation and Management

Best practices for creating and managing template images.

d = u2.connect()

def capture_element_template(selector, template_name):
    """Capture template image of UI element"""
    element = d(**selector)
    if element.exists:
        # Take element screenshot
        element_img = element.screenshot()
        element_img.save(f"{template_name}.png")
        print(f"Saved template: {template_name}.png")
        return True
    return False

# Capture templates for later use
capture_element_template({"text": "Login"}, "login_button")
capture_element_template({"resourceId": "logo"}, "app_logo")
capture_element_template({"className": "android.widget.ProgressBar"}, "loading_spinner")

# Test captured templates
templates_to_test = ["login_button.png", "app_logo.png", "loading_spinner.png"]
for template in templates_to_test:
    matches = d.image.match(template, threshold=0.8)
    print(f"{template}: {len(matches)} matches found")

Advanced Image Processing

Advanced image processing techniques for challenging scenarios.

d = u2.connect()

# Handle different screen densities
def adaptive_threshold_click(template, base_threshold=0.8):
    """Adaptively adjust threshold based on screen density"""
    info = d.info
    dpi = info.get('displayDensity', 320)
    
    # Adjust threshold based on DPI
    if dpi < 240:  # Low DPI
        threshold = base_threshold - 0.1
    elif dpi > 480:  # High DPI
        threshold = base_threshold + 0.1
    else:
        threshold = base_threshold
    
    return d.image.click(template, threshold=threshold)

# Multi-scale template matching
def multi_scale_match(template, scales=[0.8, 1.0, 1.2]):
    """Try template matching at different scales"""
    from PIL import Image
    
    template_img = Image.open(template)
    best_match = None
    best_confidence = 0
    
    for scale in scales:
        # Resize template
        new_size = (int(template_img.width * scale), int(template_img.height * scale))
        scaled_template = template_img.resize(new_size)
        scaled_template.save(f"temp_scaled_{scale}.png")
        
        # Try matching
        matches = d.image.match(f"temp_scaled_{scale}.png", threshold=0.7)
        for match in matches:
            if match['confidence'] > best_confidence:
                best_confidence = match['confidence']
                best_match = match
    
    return best_match

# Use adaptive and multi-scale matching
adaptive_threshold_click("button.png")
best_match = multi_scale_match("icon.png")
if best_match:
    d.click(best_match['result'][0], best_match['result'][1])

Integration with Other Selection Methods

Combine image processing with traditional element selection for robust automation.

d = u2.connect()

def robust_element_click(text=None, resource_id=None, template=None):
    """Try multiple selection methods in order of preference"""
    
    # Try text selector first (fastest)
    if text and d(text=text).exists:
        d(text=text).click()
        return "text_selector"
    
    # Try resource ID selector
    if resource_id and d(resourceId=resource_id).exists:
        d(resourceId=resource_id).click()
        return "resource_id_selector"
    
    # Fall back to image template matching
    if template and d.image.match(template, threshold=0.8):
        d.image.click(template)
        return "image_template"
    
    return None

# Use robust selection
result = robust_element_click(
    text="Submit",
    resource_id="com.example:id/submit_btn",
    template="submit_button.png"
)

if result:
    print(f"Clicked using: {result}")
else:
    print("Element not found using any method")

Install with Tessl CLI

npx tessl i tessl/pypi-uiautomator2@3.2.1

docs

app-management.md

device-management.md

image-processing.md

index.md

screen-input.md

ui-interaction.md

watchers-automation.md

xpath-selection.md

tile.json