Tessl Tile for pypi/cupy-cuda11x@13.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

array-operations.md cuda-integration.md custom-kernels.md fft.md index.md io-operations.md jit-compilation.md linear-algebra.md mathematical-functions.md performance-profiling.md polynomial-operations.md random.md scipy-extensions.md

io-operations.mddocs/

0
# Input/Output Operations
1

2
CuPy provides comprehensive input/output operations for reading, writing, and formatting array data across various file formats and data sources. These operations enable efficient data exchange between GPU arrays and external storage systems, supporting both binary and text formats with optimized performance for large datasets.
3

4
## Capabilities
5

6
### File I/O Operations
7

8
Core file input and output operations for saving and loading CuPy arrays in various formats.
9

10
```python { .api }
11
def save(file, arr):
12
    """
13
    Save an array to a binary file in NumPy .npy format.
14
    
15
    Parameters:
16
        file: str or file - File name or file object to save to
17
        arr: array_like - Array to save
18
    """
19

20
def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII'):
21
    """
22
    Load arrays or pickled objects from .npy, .npz or pickled files.
23
    
24
    Parameters:
25
        file: str or file - File name or file object to load from
26
        mmap_mode: None or str, optional - Memory mapping mode
27
        allow_pickle: bool, optional - Allow loading pickled object arrays
28
        fix_imports: bool, optional - Fix Python 2/3 import compatibility
29
        encoding: str, optional - Encoding used for reading Python 2 strings
30
    
31
    Returns:
32
        ndarray or dict: Loaded array data
33
    """
34

35
def savez(file, *args, **kwds):
36
    """
37
    Save several arrays into a single file in uncompressed .npz format.
38
    
39
    Parameters:
40
        file: str or file - Output file name or file object
41
        *args: array_like - Arrays to save (saved with automatic names arr_0, arr_1, etc.)
42
        **kwds: array_like - Arrays to save with specified names
43
    """
44

45
def savez_compressed(file, *args, **kwds):
46
    """
47
    Save several arrays into a single file in compressed .npz format.
48
    
49
    Parameters:
50
        file: str or file - Output file name or file object
51
        *args: array_like - Arrays to save with automatic names
52
        **kwds: array_like - Arrays to save with specified names
53
    """
54

55
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):
56
    """
57
    Load data from a text file.
58
    
59
    Parameters:
60
        fname: str or file - File name or file object to read from
61
        dtype: data-type, optional - Data type of the resulting array
62
        comments: str or sequence, optional - Characters used to indicate comments
63
        delimiter: str, optional - String used to separate values
64
        converters: dict, optional - Dictionary mapping column number to conversion function
65
        skiprows: int, optional - Skip the first skiprows lines including comments
66
        usecols: int or sequence, optional - Which columns to read
67
        unpack: bool, optional - If True, return data in separate arrays
68
        ndmin: int, optional - Minimum number of dimensions for returned array
69
        encoding: str, optional - Encoding used to decode the input file
70
        max_rows: int, optional - Read max_rows lines of content after skiprows
71
    
72
    Returns:
73
        ndarray: Data read from the text file
74
    """
75

76
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None):
77
    """
78
    Save an array to a text file.
79
    
80
    Parameters:
81
        fname: str or file - File name or file object to write to
82
        X: 1-D or 2-D array_like - Data to be saved
83
        fmt: str or sequence of str, optional - Format specification
84
        delimiter: str, optional - String separating columns
85
        newline: str, optional - String separating lines
86
        header: str, optional - Header text at the beginning of the file
87
        footer: str, optional - Footer text at the end of the file
88
        comments: str, optional - String prefix for header and footer comments
89
        encoding: str, optional - Encoding used for writing text files
90
    """
91

92
def fromfile(file, dtype=float, count=-1, sep='', offset=0):
93
    """
94
    Construct an array from data in a text or binary file.
95
    
96
    Parameters:
97
        file: str or file - Open file object or filename
98
        dtype: data-type, optional - Data type of the returned array
99
        count: int, optional - Number of items to read (-1 means all data)
100
        sep: str, optional - Separator between items for text files
101
        offset: int, optional - Offset in bytes from the file's current position
102
    
103
    Returns:
104
        ndarray: Array constructed from file data
105
    """
106

107
def tofile(arr, fid, sep="", format="%s"):
108
    """
109
    Write array to a file as text or binary (default).
110
    
111
    Parameters:
112
        arr: ndarray - Array to write to file
113
        fid: str or file - Output file name or open file object
114
        sep: str, optional - Separator between array items for text output
115
        format: str, optional - Format string for text output
116
    """
117
```
118

119
### String and Buffer Operations
120

121
Operations for converting arrays to and from string and buffer representations.
122

123
```python { .api }
124
def fromstring(string, dtype=float, count=-1, sep=''):
125
    """
126
    Create an array from string data.
127
    
128
    Parameters:
129
        string: str - String containing array data
130
        dtype: data-type, optional - Data type of the returned array
131
        count: int, optional - Number of items to read from string
132
        sep: str, optional - String separator between items
133
    
134
    Returns:
135
        ndarray: Array created from string data
136
    """
137

138
def tostring(arr, order='C'):
139
    """
140
    Return array data as a string containing the raw bytes.
141
    
142
    Parameters:
143
        arr: ndarray - Input array
144
        order: {'C', 'F', 'A'}, optional - Order of data bytes
145
    
146
    Returns:
147
        bytes: Raw bytes of array data
148
    """
149

150
def frombuffer(buffer, dtype=float, count=-1, offset=0):
151
    """
152
    Interpret a buffer as a 1-dimensional array.
153
    
154
    Parameters:
155
        buffer: buffer_like - Object exposing buffer interface
156
        dtype: data-type, optional - Data type of returned array
157
        count: int, optional - Number of items to read from buffer
158
        offset: int, optional - Start reading buffer from this offset
159
    
160
    Returns:
161
        ndarray: 1-D array from buffer data
162
    """
163

164
def tobytes(arr, order='C'):
165
    """
166
    Return array data as bytes.
167
    
168
    Parameters:
169
        arr: ndarray - Input array
170
        order: {'C', 'F', 'A'}, optional - Order of data bytes
171
    
172
    Returns:
173
        bytes: Array data as bytes
174
    """
175
```
176

177
### Formatted Text Output
178

179
Functions for creating formatted string representations of arrays for display and debugging.
180

181
```python { .api }
182
def array_str(a, max_line_width=None, precision=None, suppress_small=None):
183
    """
184
    Return a string representation of an array.
185
    
186
    Parameters:
187
        a: ndarray - Input array
188
        max_line_width: int, optional - Maximum characters per line
189
        precision: int, optional - Floating point precision
190
        suppress_small: bool, optional - Suppress small floating point values
191
    
192
    Returns:
193
        str: String representation of the array
194
    """
195

196
def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
197
    """
198
    Return string representation of an array that can recreate the array.
199
    
200
    Parameters:
201
        arr: ndarray - Input array
202
        max_line_width: int, optional - Maximum characters per line
203
        precision: int, optional - Floating point precision
204
        suppress_small: bool, optional - Suppress small floating point values
205
    
206
    Returns:
207
        str: String representation with constructor format
208
    """
209

210
def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix="", style=repr, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix="", legacy=None):
211
    """
212
    Return a string representation of an array with full control over formatting.
213
    
214
    Parameters:
215
        a: ndarray - Input array
216
        max_line_width: int, optional - Maximum characters per line
217
        precision: int, optional - Floating point precision
218
        suppress_small: bool, optional - Suppress small floating point values
219
        separator: str, optional - Separator between array elements
220
        prefix: str, optional - Prefix string for each line except first
221
        style: function, optional - Function to format scalar values
222
        formatter: dict, optional - Dictionary of formatting functions by type
223
        threshold: int, optional - Total array elements triggering summarization
224
        edgeitems: int, optional - Number of items in summary at beginning/end
225
        sign: str, optional - Controls sign printing ('-', '+', ' ')
226
        floatmode: str, optional - Controls floating-point precision display
227
        suffix: str, optional - Suffix string for each line except last
228
        legacy: bool, optional - Use legacy printing mode
229
    
230
    Returns:
231
        str: Formatted string representation
232
    """
233

234
def format_float_positional(x, precision=None, unique=True, fractional=True, trim='k', sign=False, pad_left=None, pad_right=None):
235
    """
236
    Format a floating-point scalar as a decimal string in positional notation.
237
    
238
    Parameters:
239
        x: float - Value to format
240
        precision: int, optional - Maximum number of digits to print
241
        unique: bool, optional - Use unique formatting that preserves value
242
        fractional: bool, optional - Use fractional formatting
243
        trim: str, optional - Trimming method ('k', '0', '.')
244
        sign: bool, optional - Force sign display
245
        pad_left: int, optional - Pad to this many characters on left
246
        pad_right: int, optional - Pad to this many characters on right
247
    
248
    Returns:
249
        str: Formatted float string
250
    """
251

252
def format_float_scientific(x, precision=None, unique=True, trim='k', sign=False, pad_left=None, exp_digits=None):
253
    """
254
    Format a floating-point scalar as a decimal string in scientific notation.
255
    
256
    Parameters:
257
        x: float - Value to format
258
        precision: int, optional - Maximum number of digits to print
259
        unique: bool, optional - Use unique formatting that preserves value
260
        trim: str, optional - Trimming method ('k', '0', '.')
261
        sign: bool, optional - Force sign display
262
        pad_left: int, optional - Pad to this many characters on left
263
        exp_digits: int, optional - Number of digits in exponent
264
    
265
    Returns:
266
        str: Formatted float string in scientific notation
267
    """
268
```
269

270
### Print Options and Configuration
271

272
Functions for configuring array printing and display options.
273

274
```python { .api }
275
def set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, suppress=None, nanstr=None, infstr=None, formatter=None, sign=None, floatmode=None, legacy=None):
276
    """
277
    Set printing options for arrays.
278
    
279
    Parameters:
280
        precision: int, optional - Number of digits for floating point output
281
        threshold: int, optional - Total array elements triggering summarization
282
        edgeitems: int, optional - Number of items in summary at beginning/end
283
        linewidth: int, optional - Number of characters per line for array output
284
        suppress: bool, optional - Suppress small floating point values
285
        nanstr: str, optional - String representation of NaN values
286
        infstr: str, optional - String representation of infinity values
287
        formatter: dict, optional - Custom formatting functions by data type
288
        sign: str, optional - Controls printing of sign for positive values
289
        floatmode: str, optional - Controls floating-point precision display
290
        legacy: bool, optional - Use NumPy 1.13 legacy printing mode
291
    """
292

293
def get_printoptions():
294
    """
295
    Get current printing options for arrays.
296
    
297
    Returns:
298
        dict: Current print option settings
299
    """
300

301
def printoptions(**kwargs):
302
    """
303
    Context manager for temporarily setting print options.
304
    
305
    Parameters:
306
        **kwargs: Print options to temporarily set
307
    
308
    Returns:
309
        context manager: Context for temporary print options
310
    """
311
```
312

313
### Advanced I/O Operations
314

315
Specialized I/O operations for complex data structures and formats.
316

317
```python { .api }
318
def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, defaultfmt="f%i", autostrip=False, replace_space='_', case_sensitive=True, unpack=None, invalid_raise=True, max_rows=None, encoding='bytes'):
319
    """
320
    Load data from a text file with enhanced handling of missing values.
321
    
322
    Parameters:
323
        fname: str or file - File to read data from
324
        dtype: dtype, optional - Data type of the resulting array
325
        comments: str, optional - Characters indicating start of comment
326
        delimiter: str, optional - String used to separate values
327
        skip_header: int, optional - Number of lines to skip at beginning
328
        skip_footer: int, optional - Number of lines to skip at end
329
        converters: dict, optional - Dictionary mapping column to converter function
330
        missing_values: variable, optional - Set of strings corresponding to missing data
331
        filling_values: variable, optional - Values to use for missing data
332
        usecols: sequence, optional - Which columns to read
333
        names: sequence, optional - Names for the columns
334
        excludelist: sequence, optional - Names to exclude from field names
335
        deletechars: str, optional - Characters to remove from field names
336
        defaultfmt: str, optional - Format string for field names
337
        autostrip: bool, optional - Strip whitespaces from values
338
        replace_space: char, optional - Character to replace spaces in field names
339
        case_sensitive: bool, optional - Whether field names are case sensitive
340
        unpack: bool, optional - Return data in separate variables
341
        invalid_raise: bool, optional - Raise exception for inconsistent columns
342
        max_rows: int, optional - Maximum number of rows to read
343
        encoding: str, optional - Encoding for input file
344
    
345
    Returns:
346
        ndarray: Array constructed from text file
347
    """
348

349
class DataSource:
350
    """
351
    Generic data source for reading from files, URLs, and compressed archives.
352
    
353
    Provides a unified interface for accessing data from various sources
354
    including local files, remote URLs, and compressed formats.
355
    """
356
    def __init__(self, destpath='.'): 
357
        """
358
        Parameters:
359
            destpath: str, optional - Destination path for downloaded files
360
        """
361
    
362
    def open(self, path, mode='r', encoding=None, newline=None):
363
        """
364
        Open and return file-like object for path.
365
        
366
        Parameters:
367
            path: str - Path to file or URL
368
            mode: str, optional - File open mode
369
            encoding: str, optional - Text encoding
370
            newline: str, optional - Newline handling
371
        
372
        Returns:
373
            file-like object: Opened file or stream
374
        """
375
    
376
    def abspath(self, path):
377
        """
378
        Return absolute path of file in the DataSource directory.
379
        
380
        Parameters:
381
            path: str - File path
382
        
383
        Returns:
384
            str: Absolute path
385
        """
386
    
387
    def exists(self, path):
388
        """
389
        Test if path exists.
390
        
391
        Parameters:
392
            path: str - Path to test
393
        
394
        Returns:
395
            bool: True if path exists
396
        """
397
```
398

399
## Usage Examples
400

401
### Basic File I/O
402

403
```python
404
import cupy as cp
405
import numpy as np
406

407
# Create sample data
408
data = cp.random.rand(1000, 1000, dtype=cp.float32)
409
labels = cp.arange(1000)
410

411
# Save single array in binary format
412
cp.save('data_array.npy', data)
413

414
# Load single array
415
loaded_data = cp.load('data_array.npy')
416
print("Data loaded successfully:", cp.allclose(data, loaded_data))
417

418
# Save multiple arrays in compressed archive
419
cp.savez_compressed('dataset.npz', 
420
                   features=data, 
421
                   labels=labels,
422
                   metadata=cp.array([1000, 1000, 32]))
423

424
# Load multiple arrays from archive
425
archive = cp.load('dataset.npz')
426
print("Archive contents:", list(archive.keys()))
427
print("Features shape:", archive['features'].shape)
428
print("Labels shape:", archive['labels'].shape)
429
print("Metadata:", archive['metadata'])
430

431
# Cleanup
432
archive.close()
433
```
434

435
### Text File Operations
436

437
```python
438
# Create and save data as text
439
matrix = cp.random.rand(10, 5)
440

441
# Save with custom formatting
442
cp.savetxt('matrix_data.txt', matrix, 
443
          fmt='%.6f',           # 6 decimal places
444
          delimiter=',',        # Comma-separated
445
          header='Generated random matrix data',
446
          comments='# ')
447

448
# Load text data
449
loaded_matrix = cp.loadtxt('matrix_data.txt', 
450
                          delimiter=',', 
451
                          comments='#',
452
                          skiprows=1)  # Skip header
453
print("Text data loaded, shape:", loaded_matrix.shape)
454

455
# Advanced text loading with column selection
456
data_with_labels = cp.column_stack([matrix, cp.arange(10)])
457
cp.savetxt('labeled_data.txt', data_with_labels, 
458
          fmt=['%.6f'] * 5 + ['%d'],  # Different formats per column
459
          delimiter='\t',
460
          header='col1\tcol2\tcol3\tcol4\tcol5\tlabel')
461

462
# Load specific columns
463
features_only = cp.loadtxt('labeled_data.txt', 
464
                          delimiter='\t',
465
                          usecols=range(5),  # First 5 columns only
466
                          skiprows=1)
467

468
labels_only = cp.loadtxt('labeled_data.txt',
469
                        delimiter='\t', 
470
                        usecols=[5],      # Last column only
471
                        skiprows=1,
472
                        dtype=int)
473

474
print("Features shape:", features_only.shape)
475
print("Labels shape:", labels_only.shape)
476
```
477

478
### Advanced Text Processing
479

480
```python
481
# Handle missing values with genfromtxt
482
sample_data = """# Sample dataset with missing values
483
1.0,2.0,3.0,A
484
4.0,,6.0,B
485
7.0,8.0,,C
486
,11.0,12.0,D
487
13.0,14.0,15.0,
488
"""
489

490
# Write sample file
491
with open('missing_data.csv', 'w') as f:
492
    f.write(sample_data)
493

494
# Load with missing value handling
495
data = cp.genfromtxt('missing_data.csv',
496
                    delimiter=',',
497
                    dtype=None,
498
                    names=['col1', 'col2', 'col3', 'category'],
499
                    filling_values={'col1': 0.0, 'col2': -1.0, 'col3': 999.0},
500
                    encoding='utf-8')
501

502
print("Loaded structured data:")
503
print("Numeric columns:", data['col1'], data['col2'], data['col3'])
504

505
# Complex CSV processing
506
complex_csv = """# Weather data
507
Date,Temperature,Humidity,Pressure,Conditions
508
2023-01-01,15.5,65,1013.2,Sunny
509
2023-01-02,12.0,70,1015.1,Cloudy
510
2023-01-03,8.5,85,1008.7,Rainy
511
2023-01-04,18.0,55,1020.3,Clear
512
"""
513

514
with open('weather.csv', 'w') as f:
515
    f.write(complex_csv)
516

517
# Load with converters for date processing
518
import datetime
519

520
def date_converter(date_string):
521
    return datetime.datetime.strptime(date_string.decode(), '%Y-%m-%d').toordinal()
522

523
weather_data = cp.genfromtxt('weather.csv',
524
                           delimiter=',',
525
                           skip_header=2,
526
                           usecols=[1, 2, 3],  # Skip date and conditions
527
                           names=['temp', 'humidity', 'pressure'])
528

529
print("Weather data shape:", weather_data.shape)
530
print("Temperature range:", cp.min(weather_data['temp']), "to", cp.max(weather_data['temp']))
531
```
532

533
### Binary Data Operations
534

535
```python
536
# Working with raw binary data
537
large_array = cp.random.rand(1000000).astype(cp.float32)
538

539
# Save as raw binary
540
with open('binary_data.bin', 'wb') as f:
541
    f.write(large_array.tobytes())
542

543
# Load from binary file
544
loaded_binary = cp.fromfile('binary_data.bin', dtype=cp.float32)
545
print("Binary data loaded successfully:", cp.allclose(large_array, loaded_binary))
546

547
# Working with structured data
548
dt = cp.dtype([('x', cp.float32), ('y', cp.float32), ('id', cp.int32)])
549
structured_data = cp.zeros(1000, dtype=dt)
550
structured_data['x'] = cp.random.rand(1000)
551
structured_data['y'] = cp.random.rand(1000) 
552
structured_data['id'] = cp.arange(1000)
553

554
# Save structured data
555
cp.save('structured_data.npy', structured_data)
556
loaded_structured = cp.load('structured_data.npy')
557
print("Structured data types:", loaded_structured.dtype)
558
print("Sample structured data:", loaded_structured[:3])
559

560
# Buffer operations
561
buffer_data = cp.arange(100, dtype=cp.int32)
562
byte_buffer = buffer_data.tobytes()
563
print("Buffer size:", len(byte_buffer), "bytes")
564

565
# Reconstruct from buffer
566
reconstructed = cp.frombuffer(byte_buffer, dtype=cp.int32)
567
print("Buffer reconstruction successful:", cp.array_equal(buffer_data, reconstructed))
568
```
569

570
### String and Buffer Conversions
571

572
```python
573
# String representations
574
array_2d = cp.random.rand(5, 5)
575

576
# Different string formats
577
print("Array string representation:")
578
print(cp.array_str(array_2d, precision=3, suppress_small=True))
579

580
print("\nArray repr (recreatable):")
581
print(cp.array_repr(array_2d, precision=3))
582

583
# Custom formatting
584
print("\nCustom formatted output:")
585
formatted = cp.array2string(array_2d,
586
                           precision=2,
587
                           separator=', ',
588
                           prefix='    ',
589
                           max_line_width=60)
590
print(formatted)
591

592
# String data conversion
593
string_data = "1.0 2.0 3.0 4.0 5.0"
594
array_from_string = cp.fromstring(string_data, sep=' ')
595
print("Array from string:", array_from_string)
596

597
# Comma-separated values
598
csv_string = "1.5,2.5,3.5,4.5"
599
csv_array = cp.fromstring(csv_string, sep=',')
600
print("Array from CSV string:", csv_array)
601
```
602

603
### Formatted Output and Display
604

605
```python
606
# Configure print options
607
original_options = cp.get_printoptions()
608
print("Original print options:", original_options)
609

610
# Set custom print options
611
cp.set_printoptions(precision=3, 
612
                   suppress=True,
613
                   threshold=50,
614
                   edgeitems=2,
615
                   linewidth=80)
616

617
large_array = cp.random.rand(100, 100) * 1000
618
print("Large array with custom formatting:")
619
print(large_array)
620

621
# Temporary print options using context manager
622
with cp.printoptions(precision=8, suppress=False):
623
    small_array = cp.array([1e-10, 1e-5, 1.0, 1e5, 1e10])
624
    print("High precision output:")
625
    print(small_array)
626

627
print("Back to custom formatting:")
628
print(small_array)
629

630
# Float formatting examples
631
values = [cp.pi, cp.e, 1.23456789e-8, 1.23456789e8]
632

633
for val in values:
634
    positional = cp.format_float_positional(val, precision=4)
635
    scientific = cp.format_float_scientific(val, precision=4)
636
    print(f"Value: {val}")
637
    print(f"  Positional: {positional}")
638
    print(f"  Scientific: {scientific}")
639

640
# Restore original options
641
cp.set_printoptions(**original_options)
642
```
643

644
### Data Source and URL Operations
645

646
```python
647
# Using DataSource for flexible file access
648
datasource = cp.DataSource('data_cache')
649

650
# Example with local file
651
if datasource.exists('sample.txt'):
652
    with datasource.open('sample.txt', 'r') as f:
653
        content = f.read()
654
        print("File content:", content[:100])
655

656
# Working with compressed files (conceptual example)
657
compressed_data = """
658
# This would typically be loaded from a .gz, .bz2, or .xz file
659
# DataSource automatically handles decompression
660
"""
661

662
# Advanced file format detection and handling
663
def load_flexible_format(filename):
664
    """Load data from various formats automatically."""
665
    if filename.endswith('.npy'):
666
        return cp.load(filename)
667
    elif filename.endswith('.npz'):
668
        archive = cp.load(filename)
669
        # Return first array if single array, otherwise return dict
670
        keys = list(archive.keys())
671
        if len(keys) == 1:
672
            return archive[keys[0]]
673
        return dict(archive)
674
    elif filename.endswith('.txt') or filename.endswith('.csv'):
675
        # Try to detect delimiter
676
        with open(filename, 'r') as f:
677
            first_line = f.readline()
678
            if ',' in first_line:
679
                delimiter = ','
680
            elif '\t' in first_line:
681
                delimiter = '\t'
682
            else:
683
                delimiter = None
684
        return cp.loadtxt(filename, delimiter=delimiter)
685
    else:
686
        raise ValueError(f"Unknown file format: {filename}")
687

688
# Performance-optimized I/O
689
def efficient_large_file_processing(filename, chunk_size=10000):
690
    """Process large files in chunks to manage memory."""
691
    results = []
692
    
693
    # For very large files, process in chunks
694
    total_lines = sum(1 for line in open(filename))
695
    chunks = (total_lines + chunk_size - 1) // chunk_size
696
    
697
    for i in range(chunks):
698
        skip_rows = i * chunk_size
699
        max_rows = min(chunk_size, total_lines - skip_rows)
700
        
701
        chunk_data = cp.loadtxt(filename, 
702
                               skiprows=skip_rows,
703
                               max_rows=max_rows)
704
        
705
        # Process chunk
706
        processed_chunk = cp.mean(chunk_data, axis=1)  # Example processing
707
        results.append(processed_chunk)
708
    
709
    return cp.concatenate(results)
710
```
711

712
### Memory-Efficient I/O Patterns
713

714
```python
715
# Memory mapping for large files
716
def process_large_dataset_efficiently(filename):
717
    """Process large datasets without loading entirely into GPU memory."""
718
    
719
    # Load metadata first
720
    with open(filename, 'r') as f:
721
        header = f.readline()
722
        sample_line = f.readline()
723
    
724
    # Determine dimensions and data type
725
    n_cols = len(sample_line.split(','))
726
    
727
    # Process in batches
728
    batch_size = 50000
729
    batch_results = []
730
    
731
    skip_rows = 1  # Skip header
732
    while True:
733
        try:
734
            batch = cp.loadtxt(filename, 
735
                             delimiter=',',
736
                             skiprows=skip_rows,
737
                             max_rows=batch_size)
738
            
739
            if batch.size == 0:
740
                break
741
                
742
            # Process batch on GPU
743
            batch_result = cp.sum(batch ** 2, axis=1)
744
            batch_results.append(batch_result)
745
            
746
            skip_rows += batch_size
747
            
748
        except Exception as e:
749
            print(f"Finished processing: {e}")
750
            break
751
    
752
    return cp.concatenate(batch_results) if batch_results else cp.array([])
753

754
# Streaming data processing
755
class StreamingDataProcessor:
756
    def __init__(self, output_file):
757
        self.output_file = output_file
758
        self.processed_count = 0
759
    
760
    def process_stream(self, data_generator):
761
        """Process streaming data and save incrementally."""
762
        with open(self.output_file, 'w') as f:
763
            f.write("# Processed streaming data\n")
764
            
765
            for batch in data_generator:
766
                # Convert to GPU, process, convert back
767
                gpu_batch = cp.asarray(batch)
768
                processed = cp.sqrt(cp.sum(gpu_batch ** 2, axis=1))
769
                cpu_result = cp.asnumpy(processed)
770
                
771
                # Save incrementally
772
                cp.savetxt(f, cpu_result.reshape(-1, 1), fmt='%.6f')
773
                self.processed_count += len(cpu_result)
774
        
775
        print(f"Processed {self.processed_count} items")
776

777
# Usage with generator
778
def data_generator(total_items=100000, batch_size=1000):
779
    """Generate batches of synthetic data."""
780
    import numpy as np
781
    for i in range(0, total_items, batch_size):
782
        current_batch_size = min(batch_size, total_items - i)
783
        yield np.random.rand(current_batch_size, 10)
784

785
# Process streaming data
786
processor = StreamingDataProcessor('streaming_output.txt')
787
processor.process_stream(data_generator())
788
```
789

790
Input/output operations in CuPy provide comprehensive data exchange capabilities between GPU arrays and external storage systems, supporting various file formats, text processing, binary data handling, and memory-efficient processing patterns for large datasets while maintaining high performance and compatibility with NumPy I/O interfaces.

Version

Tile

Files

io-operations.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

io-operations.mddocs/