0
# XML and Resources
1
2
Binary Android XML (AXML) and Android Resource (ARSC) file processing for accessing application resources, layouts, configuration data, and string resources. Android applications store XML files in a binary format for efficiency.
3
4
## Capabilities
5
6
### AXML Parser
7
8
Binary XML parser for converting Android's binary XML format back to standard XML.
9
10
```python { .api }
11
class AXMLPrinter:
12
def __init__(self, raw_buff: bytes):
13
"""
14
Initialize AXML parser.
15
16
Parameters:
17
- raw_buff: Raw AXML file bytes
18
"""
19
20
def get_xml(self, pretty: bool = True) -> bytes:
21
"""
22
Convert AXML to XML format.
23
24
Parameters:
25
- pretty: Format XML with indentation
26
27
Returns:
28
XML content as bytes
29
"""
30
31
def is_valid(self) -> bool:
32
"""Return True if AXML structure is valid."""
33
34
def get_xml_obj(self):
35
"""Return parsed XML as ElementTree object."""
36
37
def get_buff(self) -> bytes:
38
"""Return original AXML buffer."""
39
```
40
41
### AXML Parser Engine
42
43
Low-level AXML parsing functionality for detailed binary XML analysis.
44
45
```python { .api }
46
class AXMLParser:
47
def __init__(self, raw_buff: bytes):
48
"""
49
Initialize low-level AXML parser.
50
51
Parameters:
52
- raw_buff: Raw AXML file bytes
53
"""
54
55
def next(self) -> int:
56
"""
57
Advance to next XML event.
58
59
Returns:
60
Event type constant (START_DOCUMENT, START_TAG, etc.)
61
"""
62
63
def get_name(self) -> str:
64
"""Return name of current XML element."""
65
66
def get_text(self) -> str:
67
"""Return text content of current element."""
68
69
def get_attribute_count(self) -> int:
70
"""Return number of attributes for current element."""
71
72
def get_attribute_name(self, i: int) -> str:
73
"""
74
Get attribute name by index.
75
76
Parameters:
77
- i: Attribute index
78
79
Returns:
80
Attribute name string
81
"""
82
83
def get_attribute_value(self, i: int) -> str:
84
"""
85
Get attribute value by index.
86
87
Parameters:
88
- i: Attribute index
89
90
Returns:
91
Attribute value string
92
"""
93
94
def get_attribute_namespace(self, i: int) -> str:
95
"""Get attribute namespace by index."""
96
97
def get_attribute_resource_id(self, i: int) -> int:
98
"""Get attribute resource ID by index."""
99
```
100
101
### String Block Access
102
103
Access to AXML string pools and string resolution.
104
105
```python { .api }
106
class StringBlock:
107
def __init__(self, buff: bytes, header: object):
108
"""
109
Initialize string block parser.
110
111
Parameters:
112
- buff: String block bytes
113
- header: AXML header information
114
"""
115
116
def get_string(self, idx: int) -> str:
117
"""
118
Get string by index from string pool.
119
120
Parameters:
121
- idx: String index
122
123
Returns:
124
String value or None if invalid
125
"""
126
127
def get_strings_size(self) -> int:
128
"""Return total number of strings in pool."""
129
130
def show(self) -> None:
131
"""Display string block information."""
132
```
133
134
## Resource Parser (ARSC)
135
136
Android Resource (ARSC) file parser for accessing application resources and configurations.
137
138
```python { .api }
139
class ARSCParser:
140
def __init__(self, raw_buff: bytes):
141
"""
142
Initialize ARSC parser.
143
144
Parameters:
145
- raw_buff: Raw ARSC file bytes
146
"""
147
148
def get_packages_names(self) -> list[str]:
149
"""Return list of all package names in resources."""
150
151
def get_string_resources(self, package_name: str, locale: str = '\x00\x00') -> dict[str, str]:
152
"""
153
Get string resources for package and locale.
154
155
Parameters:
156
- package_name: Target package name
157
- locale: Locale code (default is default locale)
158
159
Returns:
160
Dictionary mapping resource names to string values
161
"""
162
163
def get_id(self, package_name: str, ttype: int, name: str) -> int:
164
"""
165
Get resource ID for named resource.
166
167
Parameters:
168
- package_name: Package name
169
- ttype: Resource type ID
170
- name: Resource name
171
172
Returns:
173
Resource ID or None if not found
174
"""
175
176
def get_public_resources(self, package_name: str, ttype: int = None) -> dict:
177
"""
178
Get public resources for package.
179
180
Parameters:
181
- package_name: Package name
182
- ttype: Optional resource type filter
183
184
Returns:
185
Dictionary of public resource mappings
186
"""
187
188
def get_type_configs(self, package_name: str, type_name: str) -> list:
189
"""
190
Get configurations for resource type.
191
192
Parameters:
193
- package_name: Package name
194
- type_name: Resource type name
195
196
Returns:
197
List of configuration objects
198
"""
199
```
200
201
### Resource Resolution
202
203
Resolve resource references and values with configuration support.
204
205
```python { .api }
206
class ResourceResolver:
207
def __init__(self, android_resources: ARSCParser, config: object = None):
208
"""
209
Initialize resource resolver.
210
211
Parameters:
212
- android_resources: ARSCParser instance
213
- config: Configuration for resolution
214
"""
215
216
def resolve(self, res_id: int) -> tuple:
217
"""
218
Resolve resource ID to value and configuration.
219
220
Parameters:
221
- res_id: Resource identifier
222
223
Returns:
224
Tuple of (resource_value, configuration)
225
"""
226
227
def get_string(self, res_id: int) -> str:
228
"""
229
Resolve resource ID to string value.
230
231
Parameters:
232
- res_id: String resource ID
233
234
Returns:
235
String value or None if not found
236
"""
237
238
def get_resolved_res_configs(self, res_id: int, config: object = None) -> list:
239
"""Get all resolved configurations for resource."""
240
```
241
242
## Resource Table Components
243
244
Low-level access to resource table structure and metadata.
245
246
```python { .api }
247
class ARSCResTablePackage:
248
def get_name(self) -> str:
249
"""Return package name."""
250
251
def get_id(self) -> int:
252
"""Return package ID."""
253
254
def get_type_strings(self) -> StringBlock:
255
"""Return type names string block."""
256
257
def get_key_strings(self) -> StringBlock:
258
"""Return key names string block."""
259
260
class ARSCResType:
261
def get_package_name(self) -> str:
262
"""Return containing package name."""
263
264
def get_type(self) -> str:
265
"""Return resource type name."""
266
267
def get_config(self) -> object:
268
"""Return configuration object."""
269
270
def get_entries(self) -> dict:
271
"""Return dictionary of entry ID to entry object mappings."""
272
273
class ARSCResTableEntry:
274
def get_index(self) -> int:
275
"""Return entry index."""
276
277
def get_key(self) -> str:
278
"""Return resource key name."""
279
280
def get_value(self):
281
"""Return resource value."""
282
283
def is_public(self) -> bool:
284
"""Return True if resource is public."""
285
286
def is_complex(self) -> bool:
287
"""Return True if entry is complex type."""
288
```
289
290
### Configuration Objects
291
292
Resource configuration handling for different device configurations.
293
294
```python { .api }
295
class ARSCResTableConfig:
296
def get_language(self) -> str:
297
"""Return language code."""
298
299
def get_country(self) -> str:
300
"""Return country code."""
301
302
def get_density(self) -> int:
303
"""Return screen density."""
304
305
def get_orientation(self) -> int:
306
"""Return screen orientation."""
307
308
def get_screen_size(self) -> int:
309
"""Return screen size category."""
310
311
def get_keyboard(self) -> int:
312
"""Return keyboard type."""
313
314
def get_qualifier(self) -> str:
315
"""Return full configuration qualifier string."""
316
317
def match(self, config) -> bool:
318
"""Check if this config matches another config."""
319
```
320
321
## Usage Examples
322
323
### Basic AXML Processing
324
325
```python
326
from androguard.core.axml import AXMLPrinter
327
328
# Read AXML file
329
with open("AndroidManifest.xml", "rb") as f:
330
axml_data = f.read()
331
332
# Parse AXML
333
axml = AXMLPrinter(axml_data)
334
335
if axml.is_valid():
336
# Convert to readable XML
337
xml_content = axml.get_xml(pretty=True)
338
print(xml_content.decode('utf-8'))
339
340
# Save as regular XML file
341
with open("AndroidManifest_readable.xml", "wb") as f:
342
f.write(xml_content)
343
else:
344
print("Invalid AXML file")
345
```
346
347
### Resource Extraction
348
349
```python
350
from androguard.core.axml import ARSCParser
351
352
# Load resources.arsc
353
with open("resources.arsc", "rb") as f:
354
arsc_data = f.read()
355
356
arsc = ARSCParser(arsc_data)
357
358
# Get all packages
359
packages = arsc.get_packages_names()
360
print(f"Packages: {packages}")
361
362
for package in packages:
363
print(f"\nPackage: {package}")
364
365
# Get string resources
366
strings = arsc.get_string_resources(package)
367
print(f"String resources: {len(strings)}")
368
369
for name, value in list(strings.items())[:10]: # Show first 10
370
print(f" {name}: {value}")
371
372
# Get public resources
373
public_resources = arsc.get_public_resources(package)
374
print(f"Public resources: {len(public_resources)}")
375
```
376
377
### Configuration-Specific Resources
378
379
```python
380
# Get resources for different configurations
381
package_name = "com.example.app"
382
383
# Get all type configurations
384
string_configs = arsc.get_type_configs(package_name, "string")
385
print(f"String configurations: {len(string_configs)}")
386
387
for config in string_configs:
388
print(f"Configuration: {config.get_qualifier()}")
389
print(f" Language: {config.get_language()}")
390
print(f" Country: {config.get_country()}")
391
print(f" Density: {config.get_density()}")
392
393
# Get strings for specific locale
394
french_strings = arsc.get_string_resources(package_name, "fr")
395
if french_strings:
396
print("French strings:")
397
for name, value in list(french_strings.items())[:5]:
398
print(f" {name}: {value}")
399
```
400
401
### Resource Resolution
402
403
```python
404
from androguard.core.axml import ARSCParser
405
406
# Create resolver
407
resolver = arsc.ResourceResolver(arsc)
408
409
# Resolve specific resource IDs
410
resource_ids = [0x7f040001, 0x7f050002, 0x7f060003]
411
412
for res_id in resource_ids:
413
try:
414
value, config = resolver.resolve(res_id)
415
print(f"Resource 0x{res_id:08x}:")
416
print(f" Value: {value}")
417
print(f" Config: {config.get_qualifier() if config else 'default'}")
418
except Exception as e:
419
print(f"Failed to resolve 0x{res_id:08x}: {e}")
420
421
# Resolve string resources
422
app_name_id = arsc.get_id(package_name, 0x03, "app_name") # 0x03 = string type
423
if app_name_id:
424
app_name = resolver.get_string(app_name_id)
425
print(f"App name: {app_name}")
426
```
427
428
### Advanced AXML Parsing
429
430
```python
431
from androguard.core.axml import AXMLParser
432
433
# Low-level AXML parsing
434
parser = AXMLParser(axml_data)
435
436
# Parse events manually
437
while True:
438
event = parser.next()
439
440
if event == axml.START_DOCUMENT:
441
print("Document started")
442
elif event == axml.START_TAG:
443
name = parser.get_name()
444
print(f"Start tag: {name}")
445
446
# Process attributes
447
attr_count = parser.get_attribute_count()
448
for i in range(attr_count):
449
attr_name = parser.get_attribute_name(i)
450
attr_value = parser.get_attribute_value(i)
451
attr_ns = parser.get_attribute_namespace(i)
452
453
if attr_ns:
454
print(f" {attr_ns}:{attr_name}={attr_value}")
455
else:
456
print(f" {attr_name}={attr_value}")
457
458
elif event == axml.END_TAG:
459
name = parser.get_name()
460
print(f"End tag: {name}")
461
462
elif event == axml.TEXT:
463
text = parser.get_text()
464
if text.strip():
465
print(f"Text: {text}")
466
467
elif event == axml.END_DOCUMENT:
468
print("Document ended")
469
break
470
```
471
472
### Resource Type Analysis
473
474
```python
475
# Analyze different resource types
476
for package in packages:
477
print(f"\nAnalyzing package: {package}")
478
479
# Get all resource types
480
public_resources = arsc.get_public_resources(package)
481
482
# Group by type
483
by_type = {}
484
for res_id, (res_type, res_name) in public_resources.items():
485
if res_type not in by_type:
486
by_type[res_type] = []
487
by_type[res_type].append((res_id, res_name))
488
489
# Display statistics
490
for res_type, resources in by_type.items():
491
print(f" {res_type}: {len(resources)} resources")
492
493
# Show a few examples
494
for res_id, res_name in resources[:3]:
495
print(f" 0x{res_id:08x}: {res_name}")
496
```
497
498
## Utility Functions
499
500
```python { .api }
501
def format_value(value_type: int, value_data: int, string_block: StringBlock = None) -> str:
502
"""
503
Format resource value based on its type.
504
505
Parameters:
506
- value_type: Resource value type constant
507
- value_data: Raw value data
508
- string_block: String block for string resolution
509
510
Returns:
511
Formatted value string
512
"""
513
514
def complexToFloat(xcomplex: int) -> float:
515
"""
516
Convert complex unit value to float.
517
518
Parameters:
519
- xcomplex: Complex unit value
520
521
Returns:
522
Float representation
523
"""
524
525
def get_arsc_info(arsc_file: str) -> dict:
526
"""
527
Get formatted ARSC file information.
528
529
Parameters:
530
- arsc_file: Path to ARSC file
531
532
Returns:
533
Dictionary with ARSC analysis results
534
"""
535
```