0
# File I/O and Data Analysis
1
2
Reading and analyzing captured memory profiling data. Includes file and socket readers for both offline analysis and live monitoring, with various methods to extract different views of allocation data.
3
4
```python
5
from typing import Optional, List, Tuple
6
```
7
8
## Capabilities
9
10
### FileReader
11
12
Context manager for reading and analyzing memray capture files. Provides multiple methods to extract different views of allocation data.
13
14
```python { .api }
15
class FileReader:
16
def __init__(self, file_name, *, report_progress=False, max_memory_records=10000):
17
"""
18
Initialize file reader for memray capture files.
19
20
Parameters:
21
- file_name: str or pathlib.Path, path to memray capture file
22
- report_progress: bool, whether to show progress during reading
23
- max_memory_records: int, maximum number of memory records to keep in memory
24
"""
25
26
def close(self):
27
"""Close the file reader."""
28
29
def get_allocation_records(self):
30
"""
31
Get iterator over all allocation records.
32
33
Returns:
34
- Iterator[AllocationRecord]: All allocation records in chronological order
35
"""
36
37
def get_high_watermark_allocation_records(self, merge_threads=True):
38
"""
39
Get allocation records at peak memory usage.
40
41
Parameters:
42
- merge_threads: bool, whether to merge records across threads
43
44
Returns:
45
- Iterator[AllocationRecord]: Records at peak memory usage
46
"""
47
48
def get_leaked_allocation_records(self, merge_threads=True):
49
"""
50
Get allocation records for memory leaks.
51
52
Parameters:
53
- merge_threads: bool, whether to merge records across threads
54
55
Returns:
56
- Iterator[AllocationRecord]: Records for leaked allocations
57
"""
58
59
def get_temporary_allocation_records(self, merge_threads=True, threshold=1):
60
"""
61
Get allocation records for short-lived allocations.
62
63
Parameters:
64
- merge_threads: bool, whether to merge records across threads
65
- threshold: int, minimum lifetime threshold in milliseconds
66
67
Returns:
68
- Iterator[AllocationRecord]: Records for temporary allocations
69
"""
70
71
def get_temporal_allocation_records(self, merge_threads=True):
72
"""
73
Get time-ordered allocation records.
74
75
Parameters:
76
- merge_threads: bool, whether to merge records across threads
77
78
Returns:
79
- Iterator[TemporalAllocationRecord]: Time-ordered temporal allocation records
80
"""
81
82
def get_temporal_high_water_mark_allocation_records(self, merge_threads=True):
83
"""
84
Get temporal records at peak memory usage.
85
86
Parameters:
87
- merge_threads: bool, whether to merge records across threads
88
89
Returns:
90
- Tuple[List[TemporalAllocationRecord], List[int]]: Temporal records at peak memory with thread IDs
91
"""
92
93
def get_memory_snapshots(self):
94
"""
95
Get iterator over memory snapshots.
96
97
Returns:
98
- Iterator[MemorySnapshot]: Memory usage snapshots over time
99
"""
100
101
@property
102
def metadata(self) -> 'Metadata':
103
"""File metadata and statistics."""
104
105
@property
106
def closed(self) -> bool:
107
"""Whether the file reader is closed."""
108
```
109
110
Usage examples:
111
112
```python
113
import memray
114
115
# Basic file reading
116
with memray.FileReader("profile.bin") as reader:
117
print(f"Total allocations: {reader.metadata.total_allocations}")
118
print(f"Peak memory: {reader.metadata.peak_memory}")
119
120
# Iterate through all records
121
for record in reader.get_allocation_records():
122
print(f"Size: {record.size}, Thread: {record.thread_name}")
123
124
# Analyze memory leaks
125
with memray.FileReader("profile.bin") as reader:
126
leaked_records = list(reader.get_leaked_allocation_records())
127
print(f"Found {len(leaked_records)} leaked allocations")
128
129
for record in leaked_records:
130
stack_trace = record.stack_trace()
131
print(f"Leaked {record.size} bytes at:")
132
for frame in stack_trace:
133
print(f" {frame}")
134
135
# Analyze peak memory usage
136
with memray.FileReader("profile.bin") as reader:
137
peak_records = list(reader.get_high_watermark_allocation_records())
138
total_peak_size = sum(record.size for record in peak_records)
139
print(f"Peak memory usage: {total_peak_size} bytes")
140
141
# Monitor memory over time
142
with memray.FileReader("profile.bin") as reader:
143
snapshots = list(reader.get_memory_snapshots())
144
for snapshot in snapshots[-10:]: # Last 10 snapshots
145
print(f"Time: {snapshot.time}ms, RSS: {snapshot.rss}, Heap: {snapshot.heap}")
146
```
147
148
### SocketReader
149
150
Context manager for reading allocations from live socket connections, enabling real-time monitoring of running processes.
151
152
```python { .api }
153
class SocketReader:
154
def __init__(self, port: int):
155
"""
156
Initialize socket reader for live monitoring.
157
158
Parameters:
159
- port: int, port number to connect to
160
"""
161
162
def get_current_snapshot(self, *, merge_threads: bool):
163
"""
164
Get current allocation snapshot from live process.
165
166
Parameters:
167
- merge_threads: bool, whether to merge records across threads
168
169
Returns:
170
- List[AllocationRecord]: Current allocation records
171
"""
172
173
@property
174
def command_line(self) -> str:
175
"""Command line of the tracked process."""
176
177
@property
178
def is_active(self) -> bool:
179
"""Whether the reader connection is active."""
180
181
@property
182
def pid(self) -> Optional[int]:
183
"""Process ID of tracked process."""
184
185
@property
186
def has_native_traces(self) -> bool:
187
"""Whether native traces are enabled."""
188
```
189
190
Usage example:
191
192
```python
193
import memray
194
import time
195
196
# Connect to live profiling session
197
with memray.SocketReader(12345) as reader:
198
print(f"Monitoring: {reader.command_line}")
199
200
while True:
201
try:
202
snapshot = reader.get_current_snapshot(merge_threads=True)
203
total_memory = sum(record.size for record in snapshot)
204
print(f"Current memory usage: {total_memory} bytes ({len(snapshot)} allocations)")
205
time.sleep(1)
206
except KeyboardInterrupt:
207
break
208
```
209
210
### Metadata Information
211
212
Comprehensive metadata about profiling sessions.
213
214
```python { .api }
215
class Metadata:
216
start_time: datetime
217
end_time: datetime
218
total_allocations: int
219
total_frames: int
220
peak_memory: int
221
command_line: str
222
pid: int
223
main_thread_id: int
224
python_allocator: str
225
has_native_traces: bool
226
trace_python_allocators: bool
227
file_format: FileFormat
228
```
229
230
Usage example:
231
232
```python
233
with memray.FileReader("profile.bin") as reader:
234
meta = reader.metadata
235
duration = meta.end_time - meta.start_time
236
print(f"Profiling session: {duration.total_seconds():.2f} seconds")
237
print(f"Process: {meta.command_line} (PID: {meta.pid})")
238
print(f"Allocations: {meta.total_allocations}")
239
print(f"Peak memory: {meta.peak_memory} bytes")
240
print(f"Native traces: {'Yes' if meta.has_native_traces else 'No'}")
241
```
242
243
### Statistics Analysis
244
245
Compute statistics and summaries from capture files.
246
247
```python { .api }
248
def compute_statistics(file_name, *, report_progress=False, num_largest=5):
249
"""
250
Compute comprehensive statistics from a memray capture file.
251
252
Parameters:
253
- file_name: str or pathlib.Path, path to memray capture file
254
- report_progress: bool, whether to show progress during computation
255
- num_largest: int, number of largest allocations to track
256
257
Returns:
258
- Stats: Statistics object with allocation summaries
259
"""
260
```
261
262
Usage example:
263
264
```python
265
import memray
266
267
# Compute statistics from a capture file
268
stats = memray.compute_statistics("profile.bin", num_largest=10)
269
print(f"Statistics computed for {stats.n_allocations} allocations")
270
```
271
272
### Utility Functions
273
274
Helper functions for formatting and debugging.
275
276
```python { .api }
277
def size_fmt(num_bytes):
278
"""
279
Format byte size in human-readable format.
280
281
Parameters:
282
- num_bytes: int, number of bytes
283
284
Returns:
285
- str: Formatted size string (e.g., "1.5 MB")
286
"""
287
288
def get_symbolic_support():
289
"""
290
Get level of symbolic debugging support available.
291
292
Returns:
293
- SymbolicSupport: Enum indicating support level
294
"""
295
296
def greenlet_trace(frame, event, arg):
297
"""
298
Greenlet tracing function for async/coroutine tracking.
299
300
Parameters:
301
- frame: frame object
302
- event: str, tracing event type
303
- arg: event argument
304
305
Returns:
306
- greenlet_trace function (itself)
307
"""
308
```
309
310
Usage example:
311
312
```python
313
import memray
314
315
# Format sizes for display
316
size_str = memray.size_fmt(1048576) # "1.0 MB"
317
318
# Check symbolic support
319
support = memray.get_symbolic_support()
320
if support == memray.SymbolicSupport.TOTAL:
321
print("Full symbolic debugging available")
322
```