0
# Data Access and Modification
1
2
Methods for reading and writing data within PE files, including address translation between file offsets and relative virtual addresses (RVAs). These functions enable direct manipulation of PE file contents.
3
4
## Capabilities
5
6
### Address Translation
7
8
Convert between file offsets and relative virtual addresses (RVAs) for accurate data access.
9
10
```python { .api }
11
def get_rva_from_offset(self, offset):
12
"""
13
Convert file offset to relative virtual address (RVA).
14
15
Args:
16
offset (int): File offset
17
18
Returns:
19
int: RVA corresponding to the offset, or None if invalid
20
"""
21
22
def get_offset_from_rva(self, rva):
23
"""
24
Convert relative virtual address (RVA) to file offset.
25
26
Args:
27
rva (int): Relative virtual address
28
29
Returns:
30
int: File offset corresponding to the RVA, or None if invalid
31
"""
32
33
def get_physical_by_rva(self, rva):
34
"""
35
Get physical address from RVA.
36
37
Args:
38
rva (int): Relative virtual address
39
40
Returns:
41
int: Physical address
42
"""
43
```
44
45
### Data Reading
46
47
Read raw data and strings from PE files at specific locations.
48
49
```python { .api }
50
def get_data(self, rva=0, length=None):
51
"""
52
Get data at RVA regardless of section boundaries.
53
54
Args:
55
rva (int): Relative virtual address to read from
56
length (int, optional): Number of bytes to read. If None, reads to end.
57
58
Returns:
59
bytes: Data at the specified RVA
60
"""
61
62
def get_string_at_rva(self, rva, max_length=1048576):
63
"""
64
Get null-terminated ASCII string at RVA.
65
66
Args:
67
rva (int): Relative virtual address
68
max_length (int): Maximum string length to prevent excessive memory use
69
70
Returns:
71
bytes: ASCII string (without null terminator)
72
"""
73
74
def get_string_u_at_rva(self, rva, max_length=65536, encoding=None):
75
"""
76
Get null-terminated Unicode string at RVA.
77
78
Args:
79
rva (int): Relative virtual address
80
max_length (int): Maximum string length in characters
81
encoding (str, optional): Text encoding to use
82
83
Returns:
84
str: Unicode string
85
"""
86
87
def get_bytes_from_data(self, offset, data):
88
"""
89
Get bytes from data buffer starting at offset.
90
91
Args:
92
offset (int): Offset into data buffer
93
data (bytes): Data buffer
94
95
Returns:
96
bytes: Extracted bytes
97
"""
98
99
def get_string_from_data(self, offset, data):
100
"""
101
Get null-terminated ASCII string from data buffer.
102
103
Args:
104
offset (int): Offset into data buffer
105
data (bytes): Data buffer
106
107
Returns:
108
bytes: ASCII string (without null terminator)
109
"""
110
```
111
112
### Integer Data Access
113
114
Read and write integer values at specific addresses with various sizes.
115
116
```python { .api }
117
def get_dword_at_rva(self, rva):
118
"""
119
Get 32-bit unsigned integer at RVA.
120
121
Args:
122
rva (int): Relative virtual address
123
124
Returns:
125
int: 32-bit value at RVA
126
"""
127
128
def get_word_at_rva(self, rva):
129
"""
130
Get 16-bit unsigned integer at RVA.
131
132
Args:
133
rva (int): Relative virtual address
134
135
Returns:
136
int: 16-bit value at RVA
137
"""
138
139
def get_qword_at_rva(self, rva):
140
"""
141
Get 64-bit unsigned integer at RVA.
142
143
Args:
144
rva (int): Relative virtual address
145
146
Returns:
147
int: 64-bit value at RVA
148
"""
149
150
def set_dword_at_rva(self, rva, dword):
151
"""
152
Set 32-bit unsigned integer at RVA.
153
154
Args:
155
rva (int): Relative virtual address
156
dword (int): 32-bit value to write
157
"""
158
159
def set_word_at_rva(self, rva, word):
160
"""
161
Set 16-bit unsigned integer at RVA.
162
163
Args:
164
rva (int): Relative virtual address
165
word (int): 16-bit value to write
166
"""
167
168
def set_qword_at_rva(self, rva, qword):
169
"""
170
Set 64-bit unsigned integer at RVA.
171
172
Args:
173
rva (int): Relative virtual address
174
qword (int): 64-bit value to write
175
"""
176
177
def set_bytes_at_rva(self, rva, data):
178
"""
179
Set bytes at RVA.
180
181
Args:
182
rva (int): Relative virtual address
183
data (bytes): Data bytes to write
184
"""
185
186
def get_dword_from_offset(self, offset):
187
"""
188
Get 32-bit unsigned integer at file offset.
189
190
Args:
191
offset (int): File offset
192
193
Returns:
194
int: 32-bit value at offset
195
"""
196
197
def set_dword_at_offset(self, offset, dword):
198
"""
199
Set 32-bit unsigned integer at file offset.
200
201
Args:
202
offset (int): File offset
203
dword (int): 32-bit value to write
204
"""
205
206
def get_word_at_rva(self, rva):
207
"""
208
Get 16-bit unsigned integer at RVA.
209
210
Args:
211
rva (int): Relative virtual address
212
213
Returns:
214
int: 16-bit value at RVA
215
"""
216
217
def set_word_at_rva(self, rva, word):
218
"""
219
Set 16-bit unsigned integer at RVA.
220
221
Args:
222
rva (int): Relative virtual address
223
word (int): 16-bit value to write
224
"""
225
226
def get_word_from_offset(self, offset):
227
"""
228
Get 16-bit unsigned integer at file offset.
229
230
Args:
231
offset (int): File offset
232
233
Returns:
234
int: 16-bit value at offset
235
"""
236
237
def set_word_at_offset(self, offset, word):
238
"""
239
Set 16-bit unsigned integer at file offset.
240
241
Args:
242
offset (int): File offset
243
word (int): 16-bit value to write
244
"""
245
246
def get_qword_at_rva(self, rva):
247
"""
248
Get 64-bit unsigned integer at RVA.
249
250
Args:
251
rva (int): Relative virtual address
252
253
Returns:
254
int: 64-bit value at RVA
255
"""
256
257
def set_qword_at_rva(self, rva, qword):
258
"""
259
Set 64-bit unsigned integer at RVA.
260
261
Args:
262
rva (int): Relative virtual address
263
qword (int): 64-bit value to write
264
"""
265
266
def get_qword_from_offset(self, offset):
267
"""
268
Get 64-bit unsigned integer at file offset.
269
270
Args:
271
offset (int): File offset
272
273
Returns:
274
int: 64-bit value at offset
275
"""
276
277
def set_qword_at_offset(self, offset, qword):
278
"""
279
Set 64-bit unsigned integer at file offset.
280
281
Args:
282
offset (int): File offset
283
qword (int): 64-bit value to write
284
"""
285
```
286
287
### Byte Array Operations
288
289
Read and write arbitrary byte sequences at specific locations.
290
291
```python { .api }
292
def set_bytes_at_rva(self, rva, data):
293
"""
294
Set bytes at RVA.
295
296
Args:
297
rva (int): Relative virtual address
298
data (bytes): Data to write
299
"""
300
301
def set_bytes_at_offset(self, offset, data):
302
"""
303
Set bytes at file offset.
304
305
Args:
306
offset (int): File offset
307
data (bytes): Data to write
308
"""
309
310
def set_data_bytes(self, offset, data):
311
"""
312
Set bytes in internal PE data buffer.
313
314
Args:
315
offset (int): Offset into internal data
316
data (bytes): Data to write
317
"""
318
```
319
320
### Section Data Management
321
322
Manage modified section data and apply changes to the PE structure.
323
324
```python { .api }
325
def merge_modified_section_data(self):
326
"""
327
Update PE internal data with modified section data.
328
329
This method applies any changes made to section data back to the
330
main PE data structure, ensuring consistency between section objects
331
and the underlying file data.
332
"""
333
```
334
335
## Usage Examples
336
337
### Reading String Data
338
339
```python
340
import pefile
341
342
with pefile.PE('executable.exe') as pe:
343
# Read ASCII string at specific RVA
344
string_data = pe.get_string_at_rva(0x1000)
345
print(f"ASCII string: {string_data}")
346
347
# Read Unicode string
348
unicode_string = pe.get_string_u_at_rva(0x2000)
349
print(f"Unicode string: {unicode_string}")
350
351
# Read raw data
352
raw_data = pe.get_data(0x3000, 100) # Read 100 bytes
353
print(f"Raw data: {raw_data.hex()}")
354
```
355
356
### Address Translation
357
358
```python
359
import pefile
360
361
with pefile.PE('executable.exe') as pe:
362
# Convert between file offset and RVA
363
file_offset = 0x1000
364
rva = pe.get_rva_from_offset(file_offset)
365
print(f"File offset {hex(file_offset)} -> RVA {hex(rva)}")
366
367
# Convert back
368
back_to_offset = pe.get_offset_from_rva(rva)
369
print(f"RVA {hex(rva)} -> File offset {hex(back_to_offset)}")
370
```
371
372
### Modifying PE Data
373
374
```python
375
import pefile
376
377
# Load PE file
378
pe = pefile.PE('executable.exe')
379
380
# Modify a DWORD value at specific RVA
381
original_value = pe.get_dword_at_rva(0x1000)
382
print(f"Original value: {hex(original_value)}")
383
384
pe.set_dword_at_rva(0x1000, 0x12345678)
385
new_value = pe.get_dword_at_rva(0x1000)
386
print(f"New value: {hex(new_value)}")
387
388
# Write modified PE file
389
pe.write('modified_executable.exe')
390
391
pe.close()
392
```
393
394
### Working with Section Data
395
396
```python
397
import pefile
398
399
with pefile.PE('executable.exe') as pe:
400
# Access section by RVA
401
section = pe.get_section_by_rva(0x1000)
402
if section:
403
print(f"Section name: {section.Name.decode('utf-8').strip()}")
404
405
# Modify section data
406
section_data = bytearray(section.get_data())
407
section_data[0:4] = b'TEST' # Replace first 4 bytes
408
409
# Update section with modified data
410
section.set_data(bytes(section_data))
411
412
# Apply changes to PE structure
413
pe.merge_modified_section_data()
414
```
415
416
### Overlay Data Access
417
418
```python
419
import pefile
420
421
with pefile.PE('executable.exe') as pe:
422
# Check if file has overlay data
423
overlay_start = pe.get_overlay_data_start_offset()
424
if overlay_start:
425
print(f"Overlay starts at offset: {hex(overlay_start)}")
426
427
# Get overlay data
428
overlay = pe.get_overlay()
429
print(f"Overlay size: {len(overlay)} bytes")
430
431
# Remove overlay and get clean PE
432
clean_pe_data = pe.trim()
433
with open('clean_executable.exe', 'wb') as f:
434
f.write(clean_pe_data)
435
```