0
# Input/Output Operations
1
2
CuPy provides comprehensive input/output operations for reading, writing, and formatting array data across various file formats and data sources. These operations enable efficient data exchange between GPU arrays and external storage systems, supporting both binary and text formats with optimized performance for large datasets.
3
4
## Capabilities
5
6
### File I/O Operations
7
8
Core file input and output operations for saving and loading CuPy arrays in various formats.
9
10
```python { .api }
11
def save(file, arr):
12
"""
13
Save an array to a binary file in NumPy .npy format.
14
15
Parameters:
16
file: str or file - File name or file object to save to
17
arr: array_like - Array to save
18
"""
19
20
def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII'):
21
"""
22
Load arrays or pickled objects from .npy, .npz or pickled files.
23
24
Parameters:
25
file: str or file - File name or file object to load from
26
mmap_mode: None or str, optional - Memory mapping mode
27
allow_pickle: bool, optional - Allow loading pickled object arrays
28
fix_imports: bool, optional - Fix Python 2/3 import compatibility
29
encoding: str, optional - Encoding used for reading Python 2 strings
30
31
Returns:
32
ndarray or dict: Loaded array data
33
"""
34
35
def savez(file, *args, **kwds):
36
"""
37
Save several arrays into a single file in uncompressed .npz format.
38
39
Parameters:
40
file: str or file - Output file name or file object
41
*args: array_like - Arrays to save (saved with automatic names arr_0, arr_1, etc.)
42
**kwds: array_like - Arrays to save with specified names
43
"""
44
45
def savez_compressed(file, *args, **kwds):
46
"""
47
Save several arrays into a single file in compressed .npz format.
48
49
Parameters:
50
file: str or file - Output file name or file object
51
*args: array_like - Arrays to save with automatic names
52
**kwds: array_like - Arrays to save with specified names
53
"""
54
55
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):
56
"""
57
Load data from a text file.
58
59
Parameters:
60
fname: str or file - File name or file object to read from
61
dtype: data-type, optional - Data type of the resulting array
62
comments: str or sequence, optional - Characters used to indicate comments
63
delimiter: str, optional - String used to separate values
64
converters: dict, optional - Dictionary mapping column number to conversion function
65
skiprows: int, optional - Skip the first skiprows lines including comments
66
usecols: int or sequence, optional - Which columns to read
67
unpack: bool, optional - If True, return data in separate arrays
68
ndmin: int, optional - Minimum number of dimensions for returned array
69
encoding: str, optional - Encoding used to decode the input file
70
max_rows: int, optional - Read max_rows lines of content after skiprows
71
72
Returns:
73
ndarray: Data read from the text file
74
"""
75
76
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None):
77
"""
78
Save an array to a text file.
79
80
Parameters:
81
fname: str or file - File name or file object to write to
82
X: 1-D or 2-D array_like - Data to be saved
83
fmt: str or sequence of str, optional - Format specification
84
delimiter: str, optional - String separating columns
85
newline: str, optional - String separating lines
86
header: str, optional - Header text at the beginning of the file
87
footer: str, optional - Footer text at the end of the file
88
comments: str, optional - String prefix for header and footer comments
89
encoding: str, optional - Encoding used for writing text files
90
"""
91
92
def fromfile(file, dtype=float, count=-1, sep='', offset=0):
93
"""
94
Construct an array from data in a text or binary file.
95
96
Parameters:
97
file: str or file - Open file object or filename
98
dtype: data-type, optional - Data type of the returned array
99
count: int, optional - Number of items to read (-1 means all data)
100
sep: str, optional - Separator between items for text files
101
offset: int, optional - Offset in bytes from the file's current position
102
103
Returns:
104
ndarray: Array constructed from file data
105
"""
106
107
def tofile(arr, fid, sep="", format="%s"):
108
"""
109
Write array to a file as text or binary (default).
110
111
Parameters:
112
arr: ndarray - Array to write to file
113
fid: str or file - Output file name or open file object
114
sep: str, optional - Separator between array items for text output
115
format: str, optional - Format string for text output
116
"""
117
```
118
119
### String and Buffer Operations
120
121
Operations for converting arrays to and from string and buffer representations.
122
123
```python { .api }
124
def fromstring(string, dtype=float, count=-1, sep=''):
125
"""
126
Create an array from string data.
127
128
Parameters:
129
string: str - String containing array data
130
dtype: data-type, optional - Data type of the returned array
131
count: int, optional - Number of items to read from string
132
sep: str, optional - String separator between items
133
134
Returns:
135
ndarray: Array created from string data
136
"""
137
138
def tostring(arr, order='C'):
139
"""
140
Return array data as a string containing the raw bytes.
141
142
Parameters:
143
arr: ndarray - Input array
144
order: {'C', 'F', 'A'}, optional - Order of data bytes
145
146
Returns:
147
bytes: Raw bytes of array data
148
"""
149
150
def frombuffer(buffer, dtype=float, count=-1, offset=0):
151
"""
152
Interpret a buffer as a 1-dimensional array.
153
154
Parameters:
155
buffer: buffer_like - Object exposing buffer interface
156
dtype: data-type, optional - Data type of returned array
157
count: int, optional - Number of items to read from buffer
158
offset: int, optional - Start reading buffer from this offset
159
160
Returns:
161
ndarray: 1-D array from buffer data
162
"""
163
164
def tobytes(arr, order='C'):
165
"""
166
Return array data as bytes.
167
168
Parameters:
169
arr: ndarray - Input array
170
order: {'C', 'F', 'A'}, optional - Order of data bytes
171
172
Returns:
173
bytes: Array data as bytes
174
"""
175
```
176
177
### Formatted Text Output
178
179
Functions for creating formatted string representations of arrays for display and debugging.
180
181
```python { .api }
182
def array_str(a, max_line_width=None, precision=None, suppress_small=None):
183
"""
184
Return a string representation of an array.
185
186
Parameters:
187
a: ndarray - Input array
188
max_line_width: int, optional - Maximum characters per line
189
precision: int, optional - Floating point precision
190
suppress_small: bool, optional - Suppress small floating point values
191
192
Returns:
193
str: String representation of the array
194
"""
195
196
def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
197
"""
198
Return string representation of an array that can recreate the array.
199
200
Parameters:
201
arr: ndarray - Input array
202
max_line_width: int, optional - Maximum characters per line
203
precision: int, optional - Floating point precision
204
suppress_small: bool, optional - Suppress small floating point values
205
206
Returns:
207
str: String representation with constructor format
208
"""
209
210
def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix="", style=repr, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix="", legacy=None):
211
"""
212
Return a string representation of an array with full control over formatting.
213
214
Parameters:
215
a: ndarray - Input array
216
max_line_width: int, optional - Maximum characters per line
217
precision: int, optional - Floating point precision
218
suppress_small: bool, optional - Suppress small floating point values
219
separator: str, optional - Separator between array elements
220
prefix: str, optional - Prefix string for each line except first
221
style: function, optional - Function to format scalar values
222
formatter: dict, optional - Dictionary of formatting functions by type
223
threshold: int, optional - Total array elements triggering summarization
224
edgeitems: int, optional - Number of items in summary at beginning/end
225
sign: str, optional - Controls sign printing ('-', '+', ' ')
226
floatmode: str, optional - Controls floating-point precision display
227
suffix: str, optional - Suffix string for each line except last
228
legacy: bool, optional - Use legacy printing mode
229
230
Returns:
231
str: Formatted string representation
232
"""
233
234
def format_float_positional(x, precision=None, unique=True, fractional=True, trim='k', sign=False, pad_left=None, pad_right=None):
235
"""
236
Format a floating-point scalar as a decimal string in positional notation.
237
238
Parameters:
239
x: float - Value to format
240
precision: int, optional - Maximum number of digits to print
241
unique: bool, optional - Use unique formatting that preserves value
242
fractional: bool, optional - Use fractional formatting
243
trim: str, optional - Trimming method ('k', '0', '.')
244
sign: bool, optional - Force sign display
245
pad_left: int, optional - Pad to this many characters on left
246
pad_right: int, optional - Pad to this many characters on right
247
248
Returns:
249
str: Formatted float string
250
"""
251
252
def format_float_scientific(x, precision=None, unique=True, trim='k', sign=False, pad_left=None, exp_digits=None):
253
"""
254
Format a floating-point scalar as a decimal string in scientific notation.
255
256
Parameters:
257
x: float - Value to format
258
precision: int, optional - Maximum number of digits to print
259
unique: bool, optional - Use unique formatting that preserves value
260
trim: str, optional - Trimming method ('k', '0', '.')
261
sign: bool, optional - Force sign display
262
pad_left: int, optional - Pad to this many characters on left
263
exp_digits: int, optional - Number of digits in exponent
264
265
Returns:
266
str: Formatted float string in scientific notation
267
"""
268
```
269
270
### Print Options and Configuration
271
272
Functions for configuring array printing and display options.
273
274
```python { .api }
275
def set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, suppress=None, nanstr=None, infstr=None, formatter=None, sign=None, floatmode=None, legacy=None):
276
"""
277
Set printing options for arrays.
278
279
Parameters:
280
precision: int, optional - Number of digits for floating point output
281
threshold: int, optional - Total array elements triggering summarization
282
edgeitems: int, optional - Number of items in summary at beginning/end
283
linewidth: int, optional - Number of characters per line for array output
284
suppress: bool, optional - Suppress small floating point values
285
nanstr: str, optional - String representation of NaN values
286
infstr: str, optional - String representation of infinity values
287
formatter: dict, optional - Custom formatting functions by data type
288
sign: str, optional - Controls printing of sign for positive values
289
floatmode: str, optional - Controls floating-point precision display
290
legacy: bool, optional - Use NumPy 1.13 legacy printing mode
291
"""
292
293
def get_printoptions():
294
"""
295
Get current printing options for arrays.
296
297
Returns:
298
dict: Current print option settings
299
"""
300
301
def printoptions(**kwargs):
302
"""
303
Context manager for temporarily setting print options.
304
305
Parameters:
306
**kwargs: Print options to temporarily set
307
308
Returns:
309
context manager: Context for temporary print options
310
"""
311
```
312
313
### Advanced I/O Operations
314
315
Specialized I/O operations for complex data structures and formats.
316
317
```python { .api }
318
def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, defaultfmt="f%i", autostrip=False, replace_space='_', case_sensitive=True, unpack=None, invalid_raise=True, max_rows=None, encoding='bytes'):
319
"""
320
Load data from a text file with enhanced handling of missing values.
321
322
Parameters:
323
fname: str or file - File to read data from
324
dtype: dtype, optional - Data type of the resulting array
325
comments: str, optional - Characters indicating start of comment
326
delimiter: str, optional - String used to separate values
327
skip_header: int, optional - Number of lines to skip at beginning
328
skip_footer: int, optional - Number of lines to skip at end
329
converters: dict, optional - Dictionary mapping column to converter function
330
missing_values: variable, optional - Set of strings corresponding to missing data
331
filling_values: variable, optional - Values to use for missing data
332
usecols: sequence, optional - Which columns to read
333
names: sequence, optional - Names for the columns
334
excludelist: sequence, optional - Names to exclude from field names
335
deletechars: str, optional - Characters to remove from field names
336
defaultfmt: str, optional - Format string for field names
337
autostrip: bool, optional - Strip whitespaces from values
338
replace_space: char, optional - Character to replace spaces in field names
339
case_sensitive: bool, optional - Whether field names are case sensitive
340
unpack: bool, optional - Return data in separate variables
341
invalid_raise: bool, optional - Raise exception for inconsistent columns
342
max_rows: int, optional - Maximum number of rows to read
343
encoding: str, optional - Encoding for input file
344
345
Returns:
346
ndarray: Array constructed from text file
347
"""
348
349
class DataSource:
350
"""
351
Generic data source for reading from files, URLs, and compressed archives.
352
353
Provides a unified interface for accessing data from various sources
354
including local files, remote URLs, and compressed formats.
355
"""
356
def __init__(self, destpath='.'):
357
"""
358
Parameters:
359
destpath: str, optional - Destination path for downloaded files
360
"""
361
362
def open(self, path, mode='r', encoding=None, newline=None):
363
"""
364
Open and return file-like object for path.
365
366
Parameters:
367
path: str - Path to file or URL
368
mode: str, optional - File open mode
369
encoding: str, optional - Text encoding
370
newline: str, optional - Newline handling
371
372
Returns:
373
file-like object: Opened file or stream
374
"""
375
376
def abspath(self, path):
377
"""
378
Return absolute path of file in the DataSource directory.
379
380
Parameters:
381
path: str - File path
382
383
Returns:
384
str: Absolute path
385
"""
386
387
def exists(self, path):
388
"""
389
Test if path exists.
390
391
Parameters:
392
path: str - Path to test
393
394
Returns:
395
bool: True if path exists
396
"""
397
```
398
399
## Usage Examples
400
401
### Basic File I/O
402
403
```python
404
import cupy as cp
405
import numpy as np
406
407
# Create sample data
408
data = cp.random.rand(1000, 1000, dtype=cp.float32)
409
labels = cp.arange(1000)
410
411
# Save single array in binary format
412
cp.save('data_array.npy', data)
413
414
# Load single array
415
loaded_data = cp.load('data_array.npy')
416
print("Data loaded successfully:", cp.allclose(data, loaded_data))
417
418
# Save multiple arrays in compressed archive
419
cp.savez_compressed('dataset.npz',
420
features=data,
421
labels=labels,
422
metadata=cp.array([1000, 1000, 32]))
423
424
# Load multiple arrays from archive
425
archive = cp.load('dataset.npz')
426
print("Archive contents:", list(archive.keys()))
427
print("Features shape:", archive['features'].shape)
428
print("Labels shape:", archive['labels'].shape)
429
print("Metadata:", archive['metadata'])
430
431
# Cleanup
432
archive.close()
433
```
434
435
### Text File Operations
436
437
```python
438
# Create and save data as text
439
matrix = cp.random.rand(10, 5)
440
441
# Save with custom formatting
442
cp.savetxt('matrix_data.txt', matrix,
443
fmt='%.6f', # 6 decimal places
444
delimiter=',', # Comma-separated
445
header='Generated random matrix data',
446
comments='# ')
447
448
# Load text data
449
loaded_matrix = cp.loadtxt('matrix_data.txt',
450
delimiter=',',
451
comments='#',
452
skiprows=1) # Skip header
453
print("Text data loaded, shape:", loaded_matrix.shape)
454
455
# Advanced text loading with column selection
456
data_with_labels = cp.column_stack([matrix, cp.arange(10)])
457
cp.savetxt('labeled_data.txt', data_with_labels,
458
fmt=['%.6f'] * 5 + ['%d'], # Different formats per column
459
delimiter='\t',
460
header='col1\tcol2\tcol3\tcol4\tcol5\tlabel')
461
462
# Load specific columns
463
features_only = cp.loadtxt('labeled_data.txt',
464
delimiter='\t',
465
usecols=range(5), # First 5 columns only
466
skiprows=1)
467
468
labels_only = cp.loadtxt('labeled_data.txt',
469
delimiter='\t',
470
usecols=[5], # Last column only
471
skiprows=1,
472
dtype=int)
473
474
print("Features shape:", features_only.shape)
475
print("Labels shape:", labels_only.shape)
476
```
477
478
### Advanced Text Processing
479
480
```python
481
# Handle missing values with genfromtxt
482
sample_data = """# Sample dataset with missing values
483
1.0,2.0,3.0,A
484
4.0,,6.0,B
485
7.0,8.0,,C
486
,11.0,12.0,D
487
13.0,14.0,15.0,
488
"""
489
490
# Write sample file
491
with open('missing_data.csv', 'w') as f:
492
f.write(sample_data)
493
494
# Load with missing value handling
495
data = cp.genfromtxt('missing_data.csv',
496
delimiter=',',
497
dtype=None,
498
names=['col1', 'col2', 'col3', 'category'],
499
filling_values={'col1': 0.0, 'col2': -1.0, 'col3': 999.0},
500
encoding='utf-8')
501
502
print("Loaded structured data:")
503
print("Numeric columns:", data['col1'], data['col2'], data['col3'])
504
505
# Complex CSV processing
506
complex_csv = """# Weather data
507
Date,Temperature,Humidity,Pressure,Conditions
508
2023-01-01,15.5,65,1013.2,Sunny
509
2023-01-02,12.0,70,1015.1,Cloudy
510
2023-01-03,8.5,85,1008.7,Rainy
511
2023-01-04,18.0,55,1020.3,Clear
512
"""
513
514
with open('weather.csv', 'w') as f:
515
f.write(complex_csv)
516
517
# Load with converters for date processing
518
import datetime
519
520
def date_converter(date_string):
521
return datetime.datetime.strptime(date_string.decode(), '%Y-%m-%d').toordinal()
522
523
weather_data = cp.genfromtxt('weather.csv',
524
delimiter=',',
525
skip_header=2,
526
usecols=[1, 2, 3], # Skip date and conditions
527
names=['temp', 'humidity', 'pressure'])
528
529
print("Weather data shape:", weather_data.shape)
530
print("Temperature range:", cp.min(weather_data['temp']), "to", cp.max(weather_data['temp']))
531
```
532
533
### Binary Data Operations
534
535
```python
536
# Working with raw binary data
537
large_array = cp.random.rand(1000000).astype(cp.float32)
538
539
# Save as raw binary
540
with open('binary_data.bin', 'wb') as f:
541
f.write(large_array.tobytes())
542
543
# Load from binary file
544
loaded_binary = cp.fromfile('binary_data.bin', dtype=cp.float32)
545
print("Binary data loaded successfully:", cp.allclose(large_array, loaded_binary))
546
547
# Working with structured data
548
dt = cp.dtype([('x', cp.float32), ('y', cp.float32), ('id', cp.int32)])
549
structured_data = cp.zeros(1000, dtype=dt)
550
structured_data['x'] = cp.random.rand(1000)
551
structured_data['y'] = cp.random.rand(1000)
552
structured_data['id'] = cp.arange(1000)
553
554
# Save structured data
555
cp.save('structured_data.npy', structured_data)
556
loaded_structured = cp.load('structured_data.npy')
557
print("Structured data types:", loaded_structured.dtype)
558
print("Sample structured data:", loaded_structured[:3])
559
560
# Buffer operations
561
buffer_data = cp.arange(100, dtype=cp.int32)
562
byte_buffer = buffer_data.tobytes()
563
print("Buffer size:", len(byte_buffer), "bytes")
564
565
# Reconstruct from buffer
566
reconstructed = cp.frombuffer(byte_buffer, dtype=cp.int32)
567
print("Buffer reconstruction successful:", cp.array_equal(buffer_data, reconstructed))
568
```
569
570
### String and Buffer Conversions
571
572
```python
573
# String representations
574
array_2d = cp.random.rand(5, 5)
575
576
# Different string formats
577
print("Array string representation:")
578
print(cp.array_str(array_2d, precision=3, suppress_small=True))
579
580
print("\nArray repr (recreatable):")
581
print(cp.array_repr(array_2d, precision=3))
582
583
# Custom formatting
584
print("\nCustom formatted output:")
585
formatted = cp.array2string(array_2d,
586
precision=2,
587
separator=', ',
588
prefix=' ',
589
max_line_width=60)
590
print(formatted)
591
592
# String data conversion
593
string_data = "1.0 2.0 3.0 4.0 5.0"
594
array_from_string = cp.fromstring(string_data, sep=' ')
595
print("Array from string:", array_from_string)
596
597
# Comma-separated values
598
csv_string = "1.5,2.5,3.5,4.5"
599
csv_array = cp.fromstring(csv_string, sep=',')
600
print("Array from CSV string:", csv_array)
601
```
602
603
### Formatted Output and Display
604
605
```python
606
# Configure print options
607
original_options = cp.get_printoptions()
608
print("Original print options:", original_options)
609
610
# Set custom print options
611
cp.set_printoptions(precision=3,
612
suppress=True,
613
threshold=50,
614
edgeitems=2,
615
linewidth=80)
616
617
large_array = cp.random.rand(100, 100) * 1000
618
print("Large array with custom formatting:")
619
print(large_array)
620
621
# Temporary print options using context manager
622
with cp.printoptions(precision=8, suppress=False):
623
small_array = cp.array([1e-10, 1e-5, 1.0, 1e5, 1e10])
624
print("High precision output:")
625
print(small_array)
626
627
print("Back to custom formatting:")
628
print(small_array)
629
630
# Float formatting examples
631
values = [cp.pi, cp.e, 1.23456789e-8, 1.23456789e8]
632
633
for val in values:
634
positional = cp.format_float_positional(val, precision=4)
635
scientific = cp.format_float_scientific(val, precision=4)
636
print(f"Value: {val}")
637
print(f" Positional: {positional}")
638
print(f" Scientific: {scientific}")
639
640
# Restore original options
641
cp.set_printoptions(**original_options)
642
```
643
644
### Data Source and URL Operations
645
646
```python
647
# Using DataSource for flexible file access
648
datasource = cp.DataSource('data_cache')
649
650
# Example with local file
651
if datasource.exists('sample.txt'):
652
with datasource.open('sample.txt', 'r') as f:
653
content = f.read()
654
print("File content:", content[:100])
655
656
# Working with compressed files (conceptual example)
657
compressed_data = """
658
# This would typically be loaded from a .gz, .bz2, or .xz file
659
# DataSource automatically handles decompression
660
"""
661
662
# Advanced file format detection and handling
663
def load_flexible_format(filename):
664
"""Load data from various formats automatically."""
665
if filename.endswith('.npy'):
666
return cp.load(filename)
667
elif filename.endswith('.npz'):
668
archive = cp.load(filename)
669
# Return first array if single array, otherwise return dict
670
keys = list(archive.keys())
671
if len(keys) == 1:
672
return archive[keys[0]]
673
return dict(archive)
674
elif filename.endswith('.txt') or filename.endswith('.csv'):
675
# Try to detect delimiter
676
with open(filename, 'r') as f:
677
first_line = f.readline()
678
if ',' in first_line:
679
delimiter = ','
680
elif '\t' in first_line:
681
delimiter = '\t'
682
else:
683
delimiter = None
684
return cp.loadtxt(filename, delimiter=delimiter)
685
else:
686
raise ValueError(f"Unknown file format: {filename}")
687
688
# Performance-optimized I/O
689
def efficient_large_file_processing(filename, chunk_size=10000):
690
"""Process large files in chunks to manage memory."""
691
results = []
692
693
# For very large files, process in chunks
694
total_lines = sum(1 for line in open(filename))
695
chunks = (total_lines + chunk_size - 1) // chunk_size
696
697
for i in range(chunks):
698
skip_rows = i * chunk_size
699
max_rows = min(chunk_size, total_lines - skip_rows)
700
701
chunk_data = cp.loadtxt(filename,
702
skiprows=skip_rows,
703
max_rows=max_rows)
704
705
# Process chunk
706
processed_chunk = cp.mean(chunk_data, axis=1) # Example processing
707
results.append(processed_chunk)
708
709
return cp.concatenate(results)
710
```
711
712
### Memory-Efficient I/O Patterns
713
714
```python
715
# Memory mapping for large files
716
def process_large_dataset_efficiently(filename):
717
"""Process large datasets without loading entirely into GPU memory."""
718
719
# Load metadata first
720
with open(filename, 'r') as f:
721
header = f.readline()
722
sample_line = f.readline()
723
724
# Determine dimensions and data type
725
n_cols = len(sample_line.split(','))
726
727
# Process in batches
728
batch_size = 50000
729
batch_results = []
730
731
skip_rows = 1 # Skip header
732
while True:
733
try:
734
batch = cp.loadtxt(filename,
735
delimiter=',',
736
skiprows=skip_rows,
737
max_rows=batch_size)
738
739
if batch.size == 0:
740
break
741
742
# Process batch on GPU
743
batch_result = cp.sum(batch ** 2, axis=1)
744
batch_results.append(batch_result)
745
746
skip_rows += batch_size
747
748
except Exception as e:
749
print(f"Finished processing: {e}")
750
break
751
752
return cp.concatenate(batch_results) if batch_results else cp.array([])
753
754
# Streaming data processing
755
class StreamingDataProcessor:
756
def __init__(self, output_file):
757
self.output_file = output_file
758
self.processed_count = 0
759
760
def process_stream(self, data_generator):
761
"""Process streaming data and save incrementally."""
762
with open(self.output_file, 'w') as f:
763
f.write("# Processed streaming data\n")
764
765
for batch in data_generator:
766
# Convert to GPU, process, convert back
767
gpu_batch = cp.asarray(batch)
768
processed = cp.sqrt(cp.sum(gpu_batch ** 2, axis=1))
769
cpu_result = cp.asnumpy(processed)
770
771
# Save incrementally
772
cp.savetxt(f, cpu_result.reshape(-1, 1), fmt='%.6f')
773
self.processed_count += len(cpu_result)
774
775
print(f"Processed {self.processed_count} items")
776
777
# Usage with generator
778
def data_generator(total_items=100000, batch_size=1000):
779
"""Generate batches of synthetic data."""
780
import numpy as np
781
for i in range(0, total_items, batch_size):
782
current_batch_size = min(batch_size, total_items - i)
783
yield np.random.rand(current_batch_size, 10)
784
785
# Process streaming data
786
processor = StreamingDataProcessor('streaming_output.txt')
787
processor.process_stream(data_generator())
788
```
789
790
Input/output operations in CuPy provide comprehensive data exchange capabilities between GPU arrays and external storage systems, supporting various file formats, text processing, binary data handling, and memory-efficient processing patterns for large datasets while maintaining high performance and compatibility with NumPy I/O interfaces.