Comprehensive Python toolkit for Android application reverse engineering and security analysis.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Java-like source code generation from Android bytecode using the DAD (Dex to Android Decompiler) engine. The decompiler converts Dalvik bytecode back into readable Java-like source code with proper control flow structures.
The main decompiler implementation that converts DEX bytecode to readable Java-like source code.
class DecompilerDAD:
def __init__(self, vm, vmx):
"""
Initialize DAD decompiler.
Parameters:
- vm: DEX object containing bytecode
- vmx: Analysis object with cross-references
"""
def get_source_class(self, _class) -> str:
"""
Decompile entire class to source code.
Parameters:
- _class: ClassDefItem to decompile
Returns:
Complete Java-like source code for class
"""
def get_source_method(self, method) -> str:
"""
Decompile single method to source code.
Parameters:
- method: EncodedMethod to decompile
Returns:
Java-like source code for method
"""
def display_source(self, method) -> None:
"""
Print decompiled method source to stdout.
Parameters:
- method: EncodedMethod to display
"""
def get_ast(self, method):
"""
Get Abstract Syntax Tree for method.
Parameters:
- method: EncodedMethod to analyze
Returns:
AST representation of method
"""High-level objects representing decompiled classes with source code access.
class DvClass:
def __init__(self, class_obj, vmx):
"""
Initialize decompiled class wrapper.
Parameters:
- class_obj: ClassDefItem object
- vmx: Analysis object
"""
def get_source(self) -> str:
"""
Get complete source code for class.
Returns:
Java-like source code including all methods and fields
"""
def get_name(self) -> str:
"""Return class name."""
def get_superclass_name(self) -> str:
"""Return superclass name."""
def get_interfaces(self) -> list[str]:
"""Return list of implemented interface names."""
def get_access_flags_string(self) -> str:
"""Return access flags as readable string."""
def get_methods(self) -> list:
"""
Get all decompiled methods in class.
Returns:
List of DvMethod objects
"""
def get_fields(self) -> list:
"""
Get all fields in class.
Returns:
List of field objects with source information
"""
def get_method(self, name: str, descriptor: str = None):
"""
Get specific decompiled method by name.
Parameters:
- name: Method name
- descriptor: Method descriptor (optional)
Returns:
DvMethod object or None if not found
"""Individual decompiled methods with source code and metadata access.
class DvMethod:
def __init__(self, method_obj, vmx):
"""
Initialize decompiled method wrapper.
Parameters:
- method_obj: EncodedMethod object
- vmx: Analysis object
"""
def get_source(self) -> str:
"""
Get source code for method.
Returns:
Java-like source code for method implementation
"""
def get_name(self) -> str:
"""Return method name."""
def get_descriptor(self) -> str:
"""Return method descriptor."""
def get_class_name(self) -> str:
"""Return containing class name."""
def get_access_flags_string(self) -> str:
"""Return access flags as readable string."""
def is_external(self) -> bool:
"""Return True if method is external."""
def is_android_api(self) -> bool:
"""Return True if method is Android API."""
def get_method_analysis(self):
"""
Get MethodAnalysis object.
Returns:
MethodAnalysis with cross-references and control flow
"""
def get_length(self) -> int:
"""Return method code length."""
def show_source(self) -> None:
"""Print method source code to stdout."""Access to the decompiler's internal AST representation for advanced analysis.
def get_ast(self):
"""
Get Abstract Syntax Tree representation.
Returns:
AST node representing method structure
"""
def get_params_type(self) -> list[str]:
"""
Get parameter types.
Returns:
List of parameter type strings
"""
def get_information(self) -> dict:
"""
Get detailed method information.
Returns:
Dictionary with method metadata
"""
def get_locals(self) -> list:
"""Get local variable information."""
def get_arguments(self) -> list:
"""Get method argument information."""Control decompiler behavior and output formatting.
class DecompilerOptions:
def __init__(self):
"""Initialize decompiler configuration."""
def set_pretty_show(self, enable: bool) -> None:
"""
Enable/disable pretty formatting.
Parameters:
- enable: True to enable pretty printing
"""
def set_colors(self, enable: bool) -> None:
"""
Enable/disable syntax coloring.
Parameters:
- enable: True to enable colors
"""
def set_show_exceptions(self, enable: bool) -> None:
"""
Enable/disable exception information.
Parameters:
- enable: True to show exception details
"""
def set_escape_unicode(self, enable: bool) -> None:
"""
Enable/disable Unicode escaping.
Parameters:
- enable: True to escape Unicode characters
"""Fine-tune decompilation output and behavior.
def set_decompiler_options(self, options: dict) -> None:
"""
Set advanced decompiler options.
Parameters:
- options: Dictionary of option name to value mappings
"""
def get_decompiler_type(self) -> str:
"""Return decompiler type identifier."""
def process_folder(self, input_folder: str, output_folder: str) -> None:
"""
Batch decompile entire folder.
Parameters:
- input_folder: Path to folder containing DEX/APK files
- output_folder: Path to output decompiled source files
"""Different types of AST nodes for representing code structures.
class ASTNode:
def get_type(self) -> str:
"""Return AST node type."""
def get_children(self) -> list:
"""Return list of child nodes."""
class ExpressionNode(ASTNode):
"""AST node representing expressions."""
def get_value(self) -> object:
"""Return expression value."""
class StatementNode(ASTNode):
"""AST node representing statements."""
def is_compound(self) -> bool:
"""Return True if compound statement."""Core intermediate representation classes for decompiler analysis and transformation.
class IRForm:
"""Base class for all intermediate representation forms."""
def get_type(self) -> str:
"""Return IR form type identifier."""
def accept(self, visitor) -> None:
"""Accept visitor for traversal patterns."""
class Constant(IRForm):
"""Constant value representation in IR."""
def __init__(self, value: object, const_type: str):
"""
Initialize constant IR form.
Parameters:
- value: The constant value
- const_type: Type of the constant
"""
def get_value(self) -> object:
"""Return the constant value."""
def get_const_type(self) -> str:
"""Return the constant type."""
class Variable(IRForm):
"""Variable reference representation in IR."""
def __init__(self, name: str, var_type: str):
"""
Initialize variable IR form.
Parameters:
- name: Variable name
- var_type: Variable type
"""
def get_name(self) -> str:
"""Return variable name."""
def get_var_type(self) -> str:
"""Return variable type."""
class BinaryOperation(IRForm):
"""Binary operation representation in IR."""
def __init__(self, operator: str, left: IRForm, right: IRForm):
"""
Initialize binary operation IR form.
Parameters:
- operator: Operation operator
- left: Left operand
- right: Right operand
"""
def get_operator(self) -> str:
"""Return operation operator."""
def get_left_operand(self) -> IRForm:
"""Return left operand."""
def get_right_operand(self) -> IRForm:
"""Return right operand."""
class AssignExpression(IRForm):
"""Assignment expression representation in IR."""
def __init__(self, target: Variable, value: IRForm):
"""
Initialize assignment expression.
Parameters:
- target: Assignment target variable
- value: Value to assign
"""
def get_target(self) -> Variable:
"""Return assignment target."""
def get_value(self) -> IRForm:
"""Return assignment value."""
class InvokeInstruction(IRForm):
"""Method invocation representation in IR."""
def __init__(self, method_name: str, args: list[IRForm], invoke_type: str):
"""
Initialize method invocation.
Parameters:
- method_name: Name of invoked method
- args: List of arguments
- invoke_type: Type of invocation
"""
def get_method_name(self) -> str:
"""Return method name."""
def get_arguments(self) -> list[IRForm]:
"""Return argument list."""
def get_invoke_type(self) -> str:
"""Return invocation type."""
class FieldAccess(IRForm):
"""Field access representation in IR."""
def __init__(self, field_name: str, instance: IRForm = None):
"""
Initialize field access.
Parameters:
- field_name: Name of accessed field
- instance: Instance object (None for static fields)
"""
def get_field_name(self) -> str:
"""Return field name."""
def get_instance(self) -> IRForm:
"""Return instance object (None for static)."""Control flow and basic block analysis structures for advanced decompilation.
class BasicBlock:
"""Represents a basic block in control flow analysis."""
def __init__(self, block_id: int):
"""
Initialize basic block.
Parameters:
- block_id: Unique identifier for the block
"""
def get_id(self) -> int:
"""Return block identifier."""
def get_instructions(self) -> list[IRForm]:
"""Return list of IR instructions in this block."""
def get_predecessors(self) -> list:
"""Return list of predecessor blocks."""
def get_successors(self) -> list:
"""Return list of successor blocks."""
def add_instruction(self, instr: IRForm) -> None:
"""Add instruction to this block."""
class StatementBlock(BasicBlock):
"""Basic block containing statement instructions."""
def get_statements(self) -> list[IRForm]:
"""Return list of statement IR forms."""
class ConditionalBlock(BasicBlock):
"""Basic block with conditional branching."""
def get_condition(self) -> IRForm:
"""Return conditional expression."""
def get_true_block(self) -> BasicBlock:
"""Return block for true branch."""
def get_false_block(self) -> BasicBlock:
"""Return block for false branch."""
class LoopBlock(BasicBlock):
"""Basic block representing loop structures."""
def get_loop_condition(self) -> IRForm:
"""Return loop condition expression."""
def get_loop_body(self) -> list[BasicBlock]:
"""Return blocks in loop body."""
def is_while_loop(self) -> bool:
"""Return True if while loop."""
def is_for_loop(self) -> bool:
"""Return True if for loop."""
class TryBlock(BasicBlock):
"""Basic block for exception handling structures."""
def get_try_body(self) -> list[BasicBlock]:
"""Return blocks in try body."""
def get_catch_blocks(self) -> list[BasicBlock]:
"""Return catch handler blocks."""
def get_finally_block(self) -> BasicBlock:
"""Return finally block (if any)."""
def get_exception_types(self) -> list[str]:
"""Return list of handled exception types."""
class ReturnBlock(BasicBlock):
"""Basic block containing return statements."""
def get_return_value(self) -> IRForm:
"""Return the returned value expression."""
def has_return_value(self) -> bool:
"""Return True if returns a value."""from androguard.decompiler.instruction import IRForm, Constant, Variable, BinaryOperation
from androguard.decompiler.basic_blocks import BasicBlock, StatementBlock
# Create IR forms programmatically
const_5 = Constant(5, "int")
var_x = Variable("x", "int")
add_op = BinaryOperation("+", var_x, const_5)
print(f"IR Expression: {var_x.get_name()} {add_op.get_operator()} {const_5.get_value()}")
# Create basic blocks
block = StatementBlock(1)
block.add_instruction(add_op)
print(f"Block {block.get_id()} has {len(block.get_instructions())} instructions")from androguard.decompiler.basic_blocks import ConditionalBlock, LoopBlock
# Analyze conditional structures
cond_block = ConditionalBlock(2)
condition = cond_block.get_condition()
if condition:
true_path = cond_block.get_true_block()
false_path = cond_block.get_false_block()
print(f"Conditional block branches to {true_path.get_id()} or {false_path.get_id()}")
# Analyze loop structures
loop_block = LoopBlock(3)
if loop_block.is_while_loop():
condition = loop_block.get_loop_condition()
body_blocks = loop_block.get_loop_body()
print(f"While loop with {len(body_blocks)} body blocks")from androguard.decompiler.basic_blocks import TryBlock
try_block = TryBlock(4)
exception_types = try_block.get_exception_types()
print(f"Try block handles {len(exception_types)} exception types:")
for exc_type in exception_types:
print(f" - {exc_type}")
# Get catch handlers
catch_blocks = try_block.get_catch_blocks()
for i, catch_block in enumerate(catch_blocks):
print(f"Catch block {i+1}: {catch_block.get_id()}")
# Check for finally block
finally_block = try_block.get_finally_block()
if finally_block:
print(f"Finally block: {finally_block.get_id()}")
def accept(self, visitor) -> None:
"""Accept visitor for AST traversal."""
class MethodNode(ASTNode):
def get_body(self):
"""Return method body node."""
def get_parameters(self) -> list:
"""Return parameter nodes."""
def get_return_type(self) -> str:
"""Return return type string."""
class ClassNode(ASTNode):
def get_methods(self) -> list:
"""Return method nodes."""
def get_fields(self) -> list:
"""Return field nodes."""
def get_superclass(self) -> str:
"""Return superclass name."""from androguard.misc import AnalyzeAPK
# Analyze APK with decompilation
apk, dex_objects, dx = AnalyzeAPK("app.apk")
# Get all decompiled classes
print(f"Decompiling {len(dex_objects)} DEX files...")
for dex in dex_objects:
classes = dex.get_classes()
print(f"Classes in DEX: {len(classes)}")
# Decompile each class
for class_obj in classes[:5]: # First 5 classes
class_name = class_obj.get_name()
print(f"\nDecompiling class: {class_name}")
# Create DvClass object
dv_class = DvClass(class_obj, dx)
# Get source code
source_code = dv_class.get_source()
print(f"Source length: {len(source_code)} characters")
# Save to file
filename = class_name.replace('/', '_').replace(';', '') + '.java'
with open(filename, 'w') as f:
f.write(source_code)# Find and decompile specific methods
oncreate_methods = dx.find_methods(method_name="onCreate")
for method_analysis in oncreate_methods:
method_obj = method_analysis.get_method()
print(f"\nDecompiling: {method_analysis.get_class_name()}.{method_analysis.get_name()}")
# Create DvMethod object
dv_method = DvMethod(method_obj, dx)
# Get decompiled source
source = dv_method.get_source()
print("Decompiled source:")
print(source)
# Get method information
info = dv_method.get_information()
print(f"Method info: {info}")# Find MainActivity and decompile completely
main_activities = dx.find_classes(r".*MainActivity.*")
for class_analysis in main_activities:
class_obj = class_analysis.get_class()
print(f"Decompiling MainActivity: {class_analysis.get_name()}")
# Create decompiled class
dv_class = DvClass(class_obj, dx)
# Get class metadata
print(f"Superclass: {dv_class.get_superclass_name()}")
print(f"Interfaces: {dv_class.get_interfaces()}")
print(f"Access flags: {dv_class.get_access_flags_string()}")
# Decompile all methods
dv_methods = dv_class.get_methods()
print(f"Methods to decompile: {len(dv_methods)}")
for dv_method in dv_methods:
method_name = dv_method.get_name()
print(f"\n--- Method: {method_name} ---")
try:
source = dv_method.get_source()
print(source)
except Exception as e:
print(f"Decompilation failed: {e}")from androguard.decompiler.dad.decompile import DecompilerDAD
# Create DAD decompiler
decompiler = DecompilerDAD(dex_objects[0], dx)
# Find interesting methods
crypto_methods = dx.find_methods(method_name=r".*(encrypt|decrypt|hash).*")
for method_analysis in crypto_methods:
method_obj = method_analysis.get_method()
print(f"\nAnalyzing AST for: {method_analysis.get_name()}")
try:
# Get AST representation
ast = decompiler.get_ast(method_obj)
if ast:
print("AST structure available")
# Custom AST analysis would go here
# Get source with decompiler directly
source = decompiler.get_source_method(method_obj)
print("Direct decompilation successful")
print(source[:200] + "..." if len(source) > 200 else source)
except Exception as e:
print(f"AST analysis failed: {e}")import os
def safe_decompile_class(class_obj, dx, output_dir):
"""Safely decompile a class with error handling."""
try:
dv_class = DvClass(class_obj, dx)
class_name = dv_class.get_name()
# Clean filename
safe_name = class_name.replace('L', '').replace(';', '').replace('/', '_')
filename = os.path.join(output_dir, safe_name + '.java')
# Get source
source = dv_class.get_source()
# Write to file
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'w', encoding='utf-8') as f:
f.write(f"// Decompiled class: {class_name}\n")
f.write(f"// Superclass: {dv_class.get_superclass_name()}\n")
f.write(f"// Interfaces: {', '.join(dv_class.get_interfaces())}\n\n")
f.write(source)
return filename, None
except Exception as e:
return None, str(e)
# Batch decompile all classes
output_directory = "decompiled_sources"
os.makedirs(output_directory, exist_ok=True)
total_classes = 0
successful = 0
failed = 0
for dex in dex_objects:
classes = dex.get_classes()
total_classes += len(classes)
for class_obj in classes:
filename, error = safe_decompile_class(class_obj, dx, output_directory)
if filename:
successful += 1
print(f"✓ {os.path.basename(filename)}")
else:
failed += 1
class_name = class_obj.get_name()
print(f"✗ {class_name}: {error}")
print(f"\nDecompilation complete:")
print(f"Total classes: {total_classes}")
print(f"Successful: {successful}")
print(f"Failed: {failed}")
print(f"Success rate: {successful/total_classes*100:.1f}%")# Compare different decompilation approaches
def compare_decompilation_methods(method_obj, dx):
"""Compare different ways to decompile a method."""
method_name = f"{method_obj.get_class_name()}.{method_obj.get_name()}"
print(f"\nComparing decompilation for: {method_name}")
# Method 1: Direct DAD decompiler
try:
decompiler = DecompilerDAD(dex_objects[0], dx)
source1 = decompiler.get_source_method(method_obj)
print("✓ DAD decompiler successful")
except Exception as e:
source1 = None
print(f"✗ DAD decompiler failed: {e}")
# Method 2: DvMethod wrapper
try:
dv_method = DvMethod(method_obj, dx)
source2 = dv_method.get_source()
print("✓ DvMethod wrapper successful")
except Exception as e:
source2 = None
print(f"✗ DvMethod wrapper failed: {e}")
# Compare results
if source1 and source2:
if source1 == source2:
print("✓ Both methods produce identical results")
else:
print("⚠ Methods produce different results")
print(f" DAD length: {len(source1)}")
print(f" DvMethod length: {len(source2)}")
return source1 or source2
# Test on various method types
test_methods = []
test_methods.extend(dx.find_methods(method_name="<init>")[:3]) # Constructors
test_methods.extend(dx.find_methods(method_name="onCreate")[:2]) # Lifecycle
test_methods.extend(dx.find_methods(accessflags=r".*static.*")[:2]) # Static methods
for method_analysis in test_methods:
method_obj = method_analysis.get_method()
source = compare_decompilation_methods(method_obj, dx)
if source:
print(f"Sample output ({len(source)} chars):")
print(source[:150] + "..." if len(source) > 150 else source)def auto_vm(filename: str):
"""
Automatically determine file type and create appropriate VM.
Parameters:
- filename: Path to DEX/APK/ODEX file
Returns:
Tuple of (DEX_object, Analysis_object)
"""
def pretty_show(vmx, method, colors: bool = True) -> str:
"""
Pretty print decompiled method with optional colors.
Parameters:
- vmx: Analysis object
- method: Method to decompile
- colors: Enable syntax highlighting
Returns:
Formatted source code string
"""
def export_source_to_disk(output_dir: str, vmx, java: bool = True, raw: bool = False) -> None:
"""
Export all decompiled source to disk.
Parameters:
- output_dir: Output directory path
- vmx: Analysis object
- java: Export as .java files
- raw: Export raw bytecode alongside source
"""Install with Tessl CLI
npx tessl i tessl/pypi-androguard