fastai simplifies training fast and accurate neural nets using modern best practices
—
Comprehensive data loading system built around the DataBlock API and transform pipelines. Provides flexible, composable data processing for all fastai domains.
Main class for managing training and validation data with integrated transforms.
class DataLoaders:
"""
Container for train/valid DataLoader pairs.
Parameters:
- *loaders: DataLoader instances (typically train, valid)
- path: Base path for saving/loading
- device: Device to place data on
"""
def __init__(self, *loaders, path='.', device=None): ...
@classmethod
def from_dblock(cls, dblock, source, path='.', **kwargs):
"""
Create DataLoaders from DataBlock.
Parameters:
- dblock: DataBlock defining data processing
- source: Data source (path, list, etc.)
- path: Base path
- **kwargs: Additional arguments
Returns:
- DataLoaders instance
"""
def show_batch(self, b=None, max_n=9, ctxs=None, show=True, **kwargs):
"""Display a batch of data."""
@property
def train(self):
"""Training DataLoader."""
@property
def valid(self):
"""Validation DataLoader."""
def one_batch(self):
"""Get one batch from training data."""
def save(self, file='data_loaders.pkl'):
"""Save DataLoaders to disk."""
@classmethod
def load(cls, path, file='data_loaders.pkl'):
"""Load DataLoaders from disk."""Flexible API for constructing data processing pipelines from modular components.
class DataBlock:
"""
Flexible data processing pipeline constructor.
Parameters:
- blocks: Transform blocks for inputs and targets
- dl_type: DataLoader type to use
- getters: Functions to extract data from source
- n_inp: Number of input elements
- item_tfms: Item-level transforms
- batch_tfms: Batch-level transforms
- **kwargs: Additional DataLoader arguments
"""
def __init__(self, blocks=(TransformBlock,), dl_type=None, getters=None,
n_inp=None, item_tfms=None, batch_tfms=None, **kwargs): ...
def dataloaders(self, source, path='.', verbose=False, **kwargs):
"""
Create DataLoaders from data source.
Parameters:
- source: Data source
- path: Base path
- verbose: Show processing information
- **kwargs: DataLoader arguments
Returns:
- DataLoaders instance
"""
def datasets(self, source, verbose=False, **kwargs):
"""Create datasets without DataLoaders."""
def summary(self, source, **kwargs):
"""Show summary of data processing pipeline."""Building blocks for different data types in the DataBlock API.
class TransformBlock:
"""Base class for transform blocks."""
def __init__(self, type_tfms=None, item_tfms=None, batch_tfms=None,
dl_type=None, dls_kwargs=None): ...
class ImageBlock(TransformBlock):
"""Transform block for image data."""
def __init__(self, cls=PILImage): ...
class CategoryBlock(TransformBlock):
"""Transform block for categorical labels."""
def __init__(self, vocab=None, sort=True, add_na=False): ...
class MultiCategoryBlock(TransformBlock):
"""Transform block for multi-label categorical data."""
def __init__(self, encoded=False, vocab=None, add_na=False): ...
class RegressionBlock(TransformBlock):
"""Transform block for regression targets."""
class MaskBlock(TransformBlock):
"""Transform block for segmentation masks."""
def __init__(self, codes=None): ...
class PointBlock(TransformBlock):
"""Transform block for point/keypoint data."""
class BBoxBlock(TransformBlock):
"""Transform block for bounding boxes."""
class BBoxLblBlock(TransformBlock):
"""Transform block for labeled bounding boxes."""Functions and classes for splitting data into train/validation sets.
class RandomSplitter:
"""Random train/validation split."""
def __init__(self, valid_pct=0.2, seed=None): ...
def __call__(self, o):
"""
Split data randomly.
Parameters:
- o: Data items to split
Returns:
- Train indices, validation indices
"""
class TrainTestSplitter:
"""Split based on test set."""
def __init__(self, test_name='test', valid_name='valid'): ...
def RandomSubsetSplitter(valid_pct=0.2, n=None, **kwargs):
"""Random subset splitter for large datasets."""
def FuncSplitter(func):
"""Split based on function result."""
def MaskSplitter(mask):
"""Split based on boolean mask."""
def FileSplitter(fname):
"""Split based on filenames in text file."""
def GrandparentSplitter(train_name='train', valid_name='valid'):
"""Split based on grandparent folder names."""
def IndexSplitter(valid_idx):
"""Split based on specific indices."""Utilities for working with files and external datasets.
def get_files(path, extensions=None, recurse=True, folders=None, followlinks=True):
"""
Get list of files with optional filtering.
Parameters:
- path: Directory path
- extensions: File extensions to include
- recurse: Search subdirectories
- folders: Folder names to include/exclude
- followlinks: Follow symbolic links
Returns:
- List of Path objects
"""
def get_image_files(path, recurse=True, folders=None):
"""Get image files from directory."""
def get_text_files(path, recurse=True, folders=None):
"""Get text files from directory."""
def untar_data(url, dest=None, c_key='data', force_download=False, extract=True):
"""
Download and extract fastai datasets.
Parameters:
- url: Dataset URL or URLs enum value
- dest: Destination directory
- c_key: Config key for base path
- force_download: Re-download if exists
- extract: Extract after download
Returns:
- Path to extracted data
"""
class URLs:
"""Predefined dataset URLs."""
PETS = 'https://s3.amazonaws.com/fast-ai-imageclas/oxford-iiit-pet.tgz'
MNIST = 'https://s3.amazonaws.com/fast-ai-sample/mnist_png.tgz'
CIFAR = 'https://s3.amazonaws.com/fast-ai-sample/cifar10.tgz'
IMDB = 'https://s3.amazonaws.com/fast-ai-nlp/imdb.tgz'
# ... many more dataset URLs
def download_url(url, dest=None, timeout=None, show_progress=True):
"""Download file from URL."""
def fastai_path():
"""Get fastai data directory path."""Core transform classes for data preprocessing.
class Transform:
"""Base class for transforms."""
def __init__(self, enc=None, dec=None, split_idx=None, order=None): ...
def __call__(self, x, **kwargs): ...
class ToTensor(Transform):
"""Convert to tensor."""
class IntToFloatTensor(Transform):
"""Convert integer tensor to float."""
class Normalize(Transform):
"""Normalize with mean and standard deviation."""
def __init__(self, mean=None, std=None, axes=None): ...
class CategoryMap(Transform):
"""Map categories to integers."""
def __init__(self, vocab=None, add_na=False, sort=True): ...
class MultiCategoryMap(Transform):
"""Map multi-categories to multi-hot encoding."""
def __init__(self, vocab=None, add_na=False, c2i=None): ...
class Resize(Transform):
"""Resize images to specified size."""
def __init__(self, size, method='crop', pad_mode='reflection'): ...Advanced data containers with integrated transforms.
class TfmdLists:
"""Lists with integrated transform pipeline."""
def __init__(self, items, tfms, use_list=None, do_setup=True, split_idx=None,
train_setup=True, splits=None, types=None, verbose=False): ...
def subset(self, i):
"""Get subset by index."""
def new_empty(self):
"""Create new empty instance."""
class Datasets:
"""Multiple TfmdLists that create tuples."""
def __init__(self, items, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs): ...
def subset(self, i):
"""Get subset by split index."""
@property
def train(self):
"""Training dataset."""
@property
def valid(self):
"""Validation dataset."""Install with Tessl CLI
npx tessl i tessl/pypi-fastai