The Deep Learning framework to train, deploy, and ship AI products Lightning fast.
—
Data handling utilities including streaming datasets, combined data loaders, and data processing functions for efficient data pipeline management in large-scale training.
High-performance streaming datasets for large-scale data processing.
class StreamingDataset:
def __init__(self, input_dir: str, **kwargs):
"""
Initialize streaming dataset.
Args:
input_dir: Directory containing streaming data
"""
class CombinedStreamingDataset:
def __init__(self, datasets: List[StreamingDataset], **kwargs):
"""
Initialize combined streaming dataset.
Args:
datasets: List of streaming datasets to combine
"""
class StreamingDataLoader:
def __init__(self, dataset: StreamingDataset, **kwargs):
"""
Initialize streaming data loader.
Args:
dataset: Streaming dataset to load
"""
# Aliases for convenience
LightningDataset = StreamingDataset
LightningIterableDataset = StreamingDatasetFunctions for optimizing and processing data for efficient loading.
def optimize(
data_dir: str,
output_dir: str,
chunk_size: int = 1024 * 1024,
**kwargs
) -> None:
"""
Optimize data for streaming.
Args:
data_dir: Input data directory
output_dir: Output directory for optimized data
chunk_size: Size of data chunks
"""
def map(
function: Callable,
inputs: List[str],
output_dir: str,
**kwargs
) -> None:
"""
Apply function to data inputs.
Args:
function: Function to apply to data
inputs: List of input files/directories
output_dir: Output directory for processed data
"""
def walk(data_dir: str, extensions: List[str] = None) -> List[str]:
"""
Walk directory and find files with specified extensions.
Args:
data_dir: Directory to walk
extensions: File extensions to include
Returns:
List of found file paths
"""Install with Tessl CLI
npx tessl i tessl/pypi-lightning