tessl/pypi-cupy-cuda112

NumPy & SciPy-compatible GPU-accelerated computing library for CUDA 11.2 environments

—

Pending

Overview

Eval results

Files

Input/Output Operations

Name: tessl/pypi-cupy-cuda112
Author: tessl

File I/O operations for saving and loading arrays in various formats including NumPy's binary formats (.npy, .npz) and text formats, enabling data persistence and interoperability between CuPy and NumPy.

Capabilities

Binary File I/O

NumPy-compatible binary file operations for efficient array storage.

def save(file, arr, allow_pickle=True, fix_imports=True):
    """
    Save array to binary file in NumPy .npy format.
    
    Parameters:
    - file: str or file-like, output file path or object
    - arr: array-like, array to save
    - allow_pickle: bool, allow pickling object arrays
    - fix_imports: bool, fix Python 2/3 pickle compatibility
    """

def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'):
    """
    Load array from .npy file.
    
    Parameters:
    - file: str or file-like, input file path or object
    - mmap_mode: str, memory mapping mode (None, 'r+', 'r', 'w+', 'c')
    - allow_pickle: bool, allow loading pickled objects
    - fix_imports: bool, fix Python 2/3 pickle compatibility
    - encoding: str, encoding for Python 2 compatibility
    
    Returns:
    cupy.ndarray, loaded array
    """

def savez(file, *args, **kwds):
    """
    Save multiple arrays in uncompressed .npz format.
    
    Parameters:
    - file: str or file-like, output file path
    - args: arrays to save with auto-generated names
    - kwds: arrays to save with specified names
    """

def savez_compressed(file, *args, **kwds):
    """
    Save multiple arrays in compressed .npz format.
    
    Parameters:
    - file: str or file-like, output file path
    - args: arrays to save with auto-generated names
    - kwds: arrays to save with specified names
    """

Text File I/O

Text-based file operations for human-readable array storage.

def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\\n', header='', footer='', comments='# ', encoding=None):
    """
    Save array to text file.
    
    Parameters:
    - fname: str or file-like, output file name or object
    - X: array-like, 1-D or 2-D array to save
    - fmt: str or sequence, format string for numbers
    - delimiter: str, column separator
    - newline: str, line separator
    - header: str, header text
    - footer: str, footer text
    - comments: str, comment prefix for header/footer
    - encoding: str, text encoding
    """

def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):
    """
    Load data from text file.
    
    Parameters:
    - fname: str or file-like, input file name or object
    - dtype: data type, output array type
    - comments: str or sequence, comment prefixes
    - delimiter: str, column separator
    - converters: dict, column converters
    - skiprows: int, number of rows to skip
    - usecols: int or sequence, columns to read
    - unpack: bool, unpack columns into separate arrays
    - ndmin: int, minimum dimensions
    - encoding: str, text encoding
    - max_rows: int, maximum rows to read
    
    Returns:
    cupy.ndarray, loaded array
    """

def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, defaultfmt='f%i', autostrip=False, replace_space='_', case_sensitive=True, unpack=None, invalid_raise=True, max_rows=None, encoding='bytes'):
    """
    Load data from text file with missing values handling.
    
    Parameters:
    - fname: str or file-like, input file
    - dtype: data type, output type
    - comments: str, comment prefix
    - delimiter: str, column separator
    - skip_header: int, header lines to skip
    - skip_footer: int, footer lines to skip
    - converters: dict, column converters
    - missing_values: str or dict, missing value indicators
    - filling_values: scalar or dict, fill values for missing data
    - usecols: sequence, columns to use
    - names: bool or sequence, field names
    - excludelist: sequence, names to exclude
    - deletechars: str, characters to remove from names
    - defaultfmt: str, default format for names
    - autostrip: bool, automatically strip whitespace
    - replace_space: str, replacement for spaces in names
    - case_sensitive: bool, case sensitive field names
    - unpack: bool, unpack to separate arrays
    - invalid_raise: bool, raise on invalid values
    - max_rows: int, maximum rows to read
    - encoding: str, text encoding
    
    Returns:
    cupy.ndarray, loaded array with structured dtype if names specified
    """

Array String Representation

Functions for converting arrays to string representations.

def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
    """
    Return string representation of array.
    
    Parameters:
    - arr: ndarray, input array
    - max_line_width: int, maximum line width
    - precision: int, floating point precision
    - suppress_small: bool, suppress small values
    
    Returns:
    str, string representation
    """

def array_str(a, max_line_width=None, precision=None, suppress_small=None):
    """
    Return string representation of array data.
    
    Parameters:
    - a: ndarray, input array
    - max_line_width: int, maximum line width
    - precision: int, floating point precision
    - suppress_small: bool, suppress small values
    
    Returns:
    str, string representation of array data
    """

def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix="", style=repr, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix="", **kwarg):
    """
    Return string representation of array with full control.
    
    Parameters:
    - a: ndarray, input array
    - max_line_width: int, maximum characters per line
    - precision: int, floating point precision
    - suppress_small: bool, suppress small values
    - separator: str, element separator
    - prefix: str, prefix string
    - style: callable, formatting function
    - formatter: dict, custom formatters
    - threshold: int, threshold for summarization
    - edgeitems: int, items at edges in summary
    - sign: str, sign handling ('-', '+', ' ')
    - floatmode: str, float format mode
    - suffix: str, suffix string
    
    Returns:
    str, formatted string representation
    """

Usage Examples

Basic File I/O Operations

import cupy as cp
import numpy as np

# Create test data
data = cp.random.random((1000, 1000))
labels = cp.arange(1000)
metadata = cp.array(['sample_' + str(i) for i in range(100)])

# Save single array to .npy file
cp.save('data.npy', data)

# Load array from .npy file
loaded_data = cp.load('data.npy')
print(f"Original shape: {data.shape}, Loaded shape: {loaded_data.shape}")

# Save multiple arrays to .npz file
cp.savez('dataset.npz', 
         features=data, 
         labels=labels, 
         metadata=metadata)

# Save with compression
cp.savez_compressed('dataset_compressed.npz',
                   features=data,
                   labels=labels)

# Load from .npz file
npz_file = cp.load('dataset.npz')
loaded_features = npz_file['features']
loaded_labels = npz_file['labels']
npz_file.close()  # Good practice to close

Text File Operations

import cupy as cp

# Create sample data
measurements = cp.random.normal(100, 15, (50, 3))
timestamps = cp.arange(50)

# Save to text file with custom formatting
cp.savetxt('measurements.txt', 
           measurements,
           fmt='%.2f',
           delimiter=',',
           header='Temperature,Humidity,Pressure',
           comments='')

# Save with more complex formatting
combined_data = cp.column_stack([timestamps, measurements])
cp.savetxt('timestamped_data.csv',
           combined_data,
           fmt=['%d', '%.2f', '%.2f', '%.2f'],
           delimiter=',',
           header='Timestamp,Temperature,Humidity,Pressure',
           comments='')

# Load text data
loaded_measurements = cp.loadtxt('measurements.txt', delimiter=',', skiprows=1)
print(f"Loaded data shape: {loaded_measurements.shape}")

# Load with column selection
temp_humidity = cp.loadtxt('measurements.txt', 
                          delimiter=',', 
                          skiprows=1,
                          usecols=(0, 1))

# Load and unpack columns
temp, humidity, pressure = cp.loadtxt('measurements.txt',
                                     delimiter=',',
                                     skiprows=1,
                                     unpack=True)

Advanced Text Processing

import cupy as cp

# Create data with missing values (simulate by saving with NaN)
data_with_missing = cp.random.random((20, 4))
data_with_missing[5:8, 1] = cp.nan
data_with_missing[12:15, 2] = cp.nan

# Save data
cp.savetxt('data_with_missing.txt', data_with_missing, fmt='%.6f')

# Load with missing value handling using genfromtxt
loaded_with_missing = cp.genfromtxt('data_with_missing.txt',
                                   missing_values='nan',
                                   filling_values=-999.0)

print(f"Missing values filled with -999: {cp.sum(loaded_with_missing == -999.0)}")

# Load structured data with field names
structured_data = cp.genfromtxt('timestamped_data.csv',
                               delimiter=',',
                               names=True,
                               dtype=None,
                               encoding='utf-8')

Interoperability with NumPy

import cupy as cp
import numpy as np

# Create CuPy array
gpu_data = cp.random.random((500, 500))

# Save CuPy array (automatically transfers to CPU)
cp.save('gpu_data.npy', gpu_data)

# Load into NumPy
numpy_data = np.load('gpu_data.npy')
print(f"NumPy loaded data type: {type(numpy_data)}")

# Load back into CuPy
cupy_data = cp.load('gpu_data.npy')
print(f"CuPy loaded data type: {type(cupy_data)}")

# Cross-platform compatibility
# Save from CuPy, load with NumPy
cp.savez('cross_platform.npz', 
         array1=cp.ones((100, 100)),
         array2=cp.zeros((50, 50)))

# Load with NumPy
np_loaded = np.load('cross_platform.npz')
np_array1 = np_loaded['array1']
print(f"NumPy can load CuPy-saved data: {np_array1.shape}")

# Convert and save with NumPy, load with CuPy
np.save('numpy_saved.npy', np.random.random((200, 200)))
cp_loaded = cp.load('numpy_saved.npy')
print(f"CuPy can load NumPy-saved data: {cp_loaded.shape}")

Memory Mapping for Large Files

import cupy as cp
import numpy as np

# Create large dataset (using NumPy for memory mapping)
large_data = np.random.random((10000, 1000)).astype(np.float32)
np.save('large_dataset.npy', large_data)

# Memory map the file (read-only)
# Note: CuPy load doesn't support mmap_mode, so we use NumPy for mapping
mmapped_data = np.load('large_dataset.npy', mmap_mode='r')

# Process chunks with CuPy
chunk_size = 1000
for i in range(0, len(mmapped_data), chunk_size):
    chunk = mmapped_data[i:i+chunk_size]
    
    # Transfer chunk to GPU
    gpu_chunk = cp.asarray(chunk)
    
    # Process on GPU
    processed = cp.sqrt(gpu_chunk + 1.0)
    
    # Get result back if needed
    result = cp.asnumpy(processed)
    
    # Process or save result
    print(f"Processed chunk {i//chunk_size + 1}/{len(mmapped_data)//chunk_size}")

Custom Array Formatting

import cupy as cp

# Create test arrays
small_array = cp.array([[1.23456789, 2.34567890],
                       [3.45678901, 4.56789012]])

large_array = cp.random.random((100, 100))

# Control string representation
print("Default representation:")
print(cp.array_str(small_array))

print("\\nCustom precision:")
print(cp.array_str(small_array, precision=2))

print("\\nCustom representation:")
print(cp.array_repr(small_array, precision=3, suppress_small=True))

# Full control with array2string
custom_repr = cp.array2string(small_array,
                             precision=4,
                             separator=', ',
                             prefix='Array: ',
                             suffix=' [end]')
print(f"\\nCustom format: {custom_repr}")

# Threshold for large arrays
print("\\nLarge array summary:")
print(cp.array_str(large_array, threshold=10, edgeitems=2))

File Format Considerations

import cupy as cp
import os

# Create test data of different types
float_data = cp.random.random((1000, 1000)).astype(cp.float32)
int_data = cp.random.randint(0, 100, (1000, 1000), dtype=cp.int32)
bool_data = cp.random.random((1000, 1000)) > 0.5

# Save in different formats and compare file sizes
formats = {
    'uncompressed_npz': lambda: cp.savez('test_uncompressed.npz', 
                                        f=float_data, i=int_data, b=bool_data),
    'compressed_npz': lambda: cp.savez_compressed('test_compressed.npz',
                                                 f=float_data, i=int_data, b=bool_data),
    'individual_npy': lambda: [cp.save(f'test_{t}.npy', d) 
                              for t, d in [('float', float_data), 
                                          ('int', int_data), 
                                          ('bool', bool_data)]]
}

for format_name, save_func in formats.items():
    save_func()
    
    if format_name == 'individual_npy':
        total_size = sum(os.path.getsize(f'test_{t}.npy') 
                        for t in ['float', 'int', 'bool'])
        print(f"{format_name}: {total_size / 1024 / 1024:.2f} MB")
    else:
        filename = f"test_{format_name.split('_')[0]}.npz"
        size = os.path.getsize(filename)
        print(f"{format_name}: {size / 1024 / 1024:.2f} MB")

Install with Tessl CLI