0
# Input/Output Operations
1
2
File I/O operations for saving and loading arrays in various formats including NumPy's binary formats (.npy, .npz) and text formats, enabling data persistence and interoperability between CuPy and NumPy.
3
4
## Capabilities
5
6
### Binary File I/O
7
8
NumPy-compatible binary file operations for efficient array storage.
9
10
```python { .api }
11
def save(file, arr, allow_pickle=True, fix_imports=True):
12
"""
13
Save array to binary file in NumPy .npy format.
14
15
Parameters:
16
- file: str or file-like, output file path or object
17
- arr: array-like, array to save
18
- allow_pickle: bool, allow pickling object arrays
19
- fix_imports: bool, fix Python 2/3 pickle compatibility
20
"""
21
22
def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'):
23
"""
24
Load array from .npy file.
25
26
Parameters:
27
- file: str or file-like, input file path or object
28
- mmap_mode: str, memory mapping mode (None, 'r+', 'r', 'w+', 'c')
29
- allow_pickle: bool, allow loading pickled objects
30
- fix_imports: bool, fix Python 2/3 pickle compatibility
31
- encoding: str, encoding for Python 2 compatibility
32
33
Returns:
34
cupy.ndarray, loaded array
35
"""
36
37
def savez(file, *args, **kwds):
38
"""
39
Save multiple arrays in uncompressed .npz format.
40
41
Parameters:
42
- file: str or file-like, output file path
43
- args: arrays to save with auto-generated names
44
- kwds: arrays to save with specified names
45
"""
46
47
def savez_compressed(file, *args, **kwds):
48
"""
49
Save multiple arrays in compressed .npz format.
50
51
Parameters:
52
- file: str or file-like, output file path
53
- args: arrays to save with auto-generated names
54
- kwds: arrays to save with specified names
55
"""
56
```
57
58
### Text File I/O
59
60
Text-based file operations for human-readable array storage.
61
62
```python { .api }
63
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\\n', header='', footer='', comments='# ', encoding=None):
64
"""
65
Save array to text file.
66
67
Parameters:
68
- fname: str or file-like, output file name or object
69
- X: array-like, 1-D or 2-D array to save
70
- fmt: str or sequence, format string for numbers
71
- delimiter: str, column separator
72
- newline: str, line separator
73
- header: str, header text
74
- footer: str, footer text
75
- comments: str, comment prefix for header/footer
76
- encoding: str, text encoding
77
"""
78
79
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):
80
"""
81
Load data from text file.
82
83
Parameters:
84
- fname: str or file-like, input file name or object
85
- dtype: data type, output array type
86
- comments: str or sequence, comment prefixes
87
- delimiter: str, column separator
88
- converters: dict, column converters
89
- skiprows: int, number of rows to skip
90
- usecols: int or sequence, columns to read
91
- unpack: bool, unpack columns into separate arrays
92
- ndmin: int, minimum dimensions
93
- encoding: str, text encoding
94
- max_rows: int, maximum rows to read
95
96
Returns:
97
cupy.ndarray, loaded array
98
"""
99
100
def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, defaultfmt='f%i', autostrip=False, replace_space='_', case_sensitive=True, unpack=None, invalid_raise=True, max_rows=None, encoding='bytes'):
101
"""
102
Load data from text file with missing values handling.
103
104
Parameters:
105
- fname: str or file-like, input file
106
- dtype: data type, output type
107
- comments: str, comment prefix
108
- delimiter: str, column separator
109
- skip_header: int, header lines to skip
110
- skip_footer: int, footer lines to skip
111
- converters: dict, column converters
112
- missing_values: str or dict, missing value indicators
113
- filling_values: scalar or dict, fill values for missing data
114
- usecols: sequence, columns to use
115
- names: bool or sequence, field names
116
- excludelist: sequence, names to exclude
117
- deletechars: str, characters to remove from names
118
- defaultfmt: str, default format for names
119
- autostrip: bool, automatically strip whitespace
120
- replace_space: str, replacement for spaces in names
121
- case_sensitive: bool, case sensitive field names
122
- unpack: bool, unpack to separate arrays
123
- invalid_raise: bool, raise on invalid values
124
- max_rows: int, maximum rows to read
125
- encoding: str, text encoding
126
127
Returns:
128
cupy.ndarray, loaded array with structured dtype if names specified
129
"""
130
```
131
132
### Array String Representation
133
134
Functions for converting arrays to string representations.
135
136
```python { .api }
137
def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
138
"""
139
Return string representation of array.
140
141
Parameters:
142
- arr: ndarray, input array
143
- max_line_width: int, maximum line width
144
- precision: int, floating point precision
145
- suppress_small: bool, suppress small values
146
147
Returns:
148
str, string representation
149
"""
150
151
def array_str(a, max_line_width=None, precision=None, suppress_small=None):
152
"""
153
Return string representation of array data.
154
155
Parameters:
156
- a: ndarray, input array
157
- max_line_width: int, maximum line width
158
- precision: int, floating point precision
159
- suppress_small: bool, suppress small values
160
161
Returns:
162
str, string representation of array data
163
"""
164
165
def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix="", style=repr, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix="", **kwarg):
166
"""
167
Return string representation of array with full control.
168
169
Parameters:
170
- a: ndarray, input array
171
- max_line_width: int, maximum characters per line
172
- precision: int, floating point precision
173
- suppress_small: bool, suppress small values
174
- separator: str, element separator
175
- prefix: str, prefix string
176
- style: callable, formatting function
177
- formatter: dict, custom formatters
178
- threshold: int, threshold for summarization
179
- edgeitems: int, items at edges in summary
180
- sign: str, sign handling ('-', '+', ' ')
181
- floatmode: str, float format mode
182
- suffix: str, suffix string
183
184
Returns:
185
str, formatted string representation
186
"""
187
```
188
189
## Usage Examples
190
191
### Basic File I/O Operations
192
193
```python
194
import cupy as cp
195
import numpy as np
196
197
# Create test data
198
data = cp.random.random((1000, 1000))
199
labels = cp.arange(1000)
200
metadata = cp.array(['sample_' + str(i) for i in range(100)])
201
202
# Save single array to .npy file
203
cp.save('data.npy', data)
204
205
# Load array from .npy file
206
loaded_data = cp.load('data.npy')
207
print(f"Original shape: {data.shape}, Loaded shape: {loaded_data.shape}")
208
209
# Save multiple arrays to .npz file
210
cp.savez('dataset.npz',
211
features=data,
212
labels=labels,
213
metadata=metadata)
214
215
# Save with compression
216
cp.savez_compressed('dataset_compressed.npz',
217
features=data,
218
labels=labels)
219
220
# Load from .npz file
221
npz_file = cp.load('dataset.npz')
222
loaded_features = npz_file['features']
223
loaded_labels = npz_file['labels']
224
npz_file.close() # Good practice to close
225
```
226
227
### Text File Operations
228
229
```python
230
import cupy as cp
231
232
# Create sample data
233
measurements = cp.random.normal(100, 15, (50, 3))
234
timestamps = cp.arange(50)
235
236
# Save to text file with custom formatting
237
cp.savetxt('measurements.txt',
238
measurements,
239
fmt='%.2f',
240
delimiter=',',
241
header='Temperature,Humidity,Pressure',
242
comments='')
243
244
# Save with more complex formatting
245
combined_data = cp.column_stack([timestamps, measurements])
246
cp.savetxt('timestamped_data.csv',
247
combined_data,
248
fmt=['%d', '%.2f', '%.2f', '%.2f'],
249
delimiter=',',
250
header='Timestamp,Temperature,Humidity,Pressure',
251
comments='')
252
253
# Load text data
254
loaded_measurements = cp.loadtxt('measurements.txt', delimiter=',', skiprows=1)
255
print(f"Loaded data shape: {loaded_measurements.shape}")
256
257
# Load with column selection
258
temp_humidity = cp.loadtxt('measurements.txt',
259
delimiter=',',
260
skiprows=1,
261
usecols=(0, 1))
262
263
# Load and unpack columns
264
temp, humidity, pressure = cp.loadtxt('measurements.txt',
265
delimiter=',',
266
skiprows=1,
267
unpack=True)
268
```
269
270
### Advanced Text Processing
271
272
```python
273
import cupy as cp
274
275
# Create data with missing values (simulate by saving with NaN)
276
data_with_missing = cp.random.random((20, 4))
277
data_with_missing[5:8, 1] = cp.nan
278
data_with_missing[12:15, 2] = cp.nan
279
280
# Save data
281
cp.savetxt('data_with_missing.txt', data_with_missing, fmt='%.6f')
282
283
# Load with missing value handling using genfromtxt
284
loaded_with_missing = cp.genfromtxt('data_with_missing.txt',
285
missing_values='nan',
286
filling_values=-999.0)
287
288
print(f"Missing values filled with -999: {cp.sum(loaded_with_missing == -999.0)}")
289
290
# Load structured data with field names
291
structured_data = cp.genfromtxt('timestamped_data.csv',
292
delimiter=',',
293
names=True,
294
dtype=None,
295
encoding='utf-8')
296
```
297
298
### Interoperability with NumPy
299
300
```python
301
import cupy as cp
302
import numpy as np
303
304
# Create CuPy array
305
gpu_data = cp.random.random((500, 500))
306
307
# Save CuPy array (automatically transfers to CPU)
308
cp.save('gpu_data.npy', gpu_data)
309
310
# Load into NumPy
311
numpy_data = np.load('gpu_data.npy')
312
print(f"NumPy loaded data type: {type(numpy_data)}")
313
314
# Load back into CuPy
315
cupy_data = cp.load('gpu_data.npy')
316
print(f"CuPy loaded data type: {type(cupy_data)}")
317
318
# Cross-platform compatibility
319
# Save from CuPy, load with NumPy
320
cp.savez('cross_platform.npz',
321
array1=cp.ones((100, 100)),
322
array2=cp.zeros((50, 50)))
323
324
# Load with NumPy
325
np_loaded = np.load('cross_platform.npz')
326
np_array1 = np_loaded['array1']
327
print(f"NumPy can load CuPy-saved data: {np_array1.shape}")
328
329
# Convert and save with NumPy, load with CuPy
330
np.save('numpy_saved.npy', np.random.random((200, 200)))
331
cp_loaded = cp.load('numpy_saved.npy')
332
print(f"CuPy can load NumPy-saved data: {cp_loaded.shape}")
333
```
334
335
### Memory Mapping for Large Files
336
337
```python
338
import cupy as cp
339
import numpy as np
340
341
# Create large dataset (using NumPy for memory mapping)
342
large_data = np.random.random((10000, 1000)).astype(np.float32)
343
np.save('large_dataset.npy', large_data)
344
345
# Memory map the file (read-only)
346
# Note: CuPy load doesn't support mmap_mode, so we use NumPy for mapping
347
mmapped_data = np.load('large_dataset.npy', mmap_mode='r')
348
349
# Process chunks with CuPy
350
chunk_size = 1000
351
for i in range(0, len(mmapped_data), chunk_size):
352
chunk = mmapped_data[i:i+chunk_size]
353
354
# Transfer chunk to GPU
355
gpu_chunk = cp.asarray(chunk)
356
357
# Process on GPU
358
processed = cp.sqrt(gpu_chunk + 1.0)
359
360
# Get result back if needed
361
result = cp.asnumpy(processed)
362
363
# Process or save result
364
print(f"Processed chunk {i//chunk_size + 1}/{len(mmapped_data)//chunk_size}")
365
```
366
367
### Custom Array Formatting
368
369
```python
370
import cupy as cp
371
372
# Create test arrays
373
small_array = cp.array([[1.23456789, 2.34567890],
374
[3.45678901, 4.56789012]])
375
376
large_array = cp.random.random((100, 100))
377
378
# Control string representation
379
print("Default representation:")
380
print(cp.array_str(small_array))
381
382
print("\\nCustom precision:")
383
print(cp.array_str(small_array, precision=2))
384
385
print("\\nCustom representation:")
386
print(cp.array_repr(small_array, precision=3, suppress_small=True))
387
388
# Full control with array2string
389
custom_repr = cp.array2string(small_array,
390
precision=4,
391
separator=', ',
392
prefix='Array: ',
393
suffix=' [end]')
394
print(f"\\nCustom format: {custom_repr}")
395
396
# Threshold for large arrays
397
print("\\nLarge array summary:")
398
print(cp.array_str(large_array, threshold=10, edgeitems=2))
399
```
400
401
### File Format Considerations
402
403
```python
404
import cupy as cp
405
import os
406
407
# Create test data of different types
408
float_data = cp.random.random((1000, 1000)).astype(cp.float32)
409
int_data = cp.random.randint(0, 100, (1000, 1000), dtype=cp.int32)
410
bool_data = cp.random.random((1000, 1000)) > 0.5
411
412
# Save in different formats and compare file sizes
413
formats = {
414
'uncompressed_npz': lambda: cp.savez('test_uncompressed.npz',
415
f=float_data, i=int_data, b=bool_data),
416
'compressed_npz': lambda: cp.savez_compressed('test_compressed.npz',
417
f=float_data, i=int_data, b=bool_data),
418
'individual_npy': lambda: [cp.save(f'test_{t}.npy', d)
419
for t, d in [('float', float_data),
420
('int', int_data),
421
('bool', bool_data)]]
422
}
423
424
for format_name, save_func in formats.items():
425
save_func()
426
427
if format_name == 'individual_npy':
428
total_size = sum(os.path.getsize(f'test_{t}.npy')
429
for t in ['float', 'int', 'bool'])
430
print(f"{format_name}: {total_size / 1024 / 1024:.2f} MB")
431
else:
432
filename = f"test_{format_name.split('_')[0]}.npz"
433
size = os.path.getsize(filename)
434
print(f"{format_name}: {size / 1024 / 1024:.2f} MB")
435
```