or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

callbacks.mdcollaborative-filtering.mdcore-training.mddata-loading.mdindex.mdinterpretation.mdmedical.mdmetrics-losses.mdtabular.mdtext.mdvision.md

vision.mddocs/

0

# Computer Vision

1

2

Complete computer vision toolkit for image classification, segmentation, object detection, and more. Built on top of the core fastai infrastructure with domain-specific optimizations.

3

4

## Capabilities

5

6

### Vision Learners

7

8

Main entry points for creating vision models with pre-trained architectures and domain-specific optimizations.

9

10

```python { .api }

11

def vision_learner(dls, arch, normalize=True, n_out=None, pretrained=True,

12

cut=None, splitter=None, y_range=None, config=None,

13

loss_func=None, opt_func=Adam, lr=defaults.lr, metrics=None,

14

**kwargs):

15

"""

16

Create a computer vision learner.

17

18

Parameters:

19

- dls: DataLoaders with image data

20

- arch: Model architecture (resnet34, efficientnet_b0, etc.)

21

- normalize: Apply ImageNet normalization

22

- n_out: Number of outputs (auto-detected from data if None)

23

- pretrained: Use pre-trained weights

24

- cut: Where to cut the pre-trained model

25

- splitter: Function to split model for differential learning rates

26

- y_range: Range of target values for regression

27

- config: Model configuration

28

- loss_func: Loss function (auto-selected if None)

29

- opt_func: Optimizer constructor

30

- lr: Learning rate

31

- metrics: Metrics to track

32

33

Returns:

34

- Learner instance configured for vision tasks

35

"""

36

37

def cnn_learner(dls, arch, **kwargs):

38

"""Deprecated alias for vision_learner."""

39

40

def unet_learner(dls, arch, normalize=True, n_out=None, img_size=None,

41

pretrained=True, cut=None, splitter=None, y_range=None,

42

config=None, loss_func=None, opt_func=Adam, lr=defaults.lr,

43

metrics=None, **kwargs):

44

"""

45

Create a U-Net learner for segmentation.

46

47

Parameters:

48

- dls: DataLoaders with image and mask data

49

- arch: Encoder architecture (resnet34, etc.)

50

- normalize: Apply ImageNet normalization

51

- n_out: Number of output classes

52

- img_size: Input image size

53

- pretrained: Use pre-trained encoder

54

- cut: Where to cut the encoder

55

- splitter: Function to split model layers

56

- y_range: Range for regression outputs

57

- config: Model configuration

58

- loss_func: Loss function (typically CrossEntropyLoss)

59

- opt_func: Optimizer constructor

60

- lr: Learning rate

61

- metrics: Metrics to track (Dice, IoU, etc.)

62

63

Returns:

64

- Learner instance for segmentation

65

"""

66

67

def create_vision_model(arch, n_out=1000, pretrained=True, cut=None, **kwargs):

68

"""Create vision model without learner wrapper."""

69

70

def create_unet_model(arch, n_out, img_size=None, pretrained=True, cut=None, **kwargs):

71

"""Create U-Net model without learner wrapper."""

72

```

73

74

### Image Data Loaders

75

76

Specialized DataLoaders for common computer vision tasks.

77

78

```python { .api }

79

class ImageDataLoaders(DataLoaders):

80

"""DataLoaders for image datasets."""

81

82

@classmethod

83

def from_folder(cls, path, train='train', valid='valid', valid_pct=None,

84

seed=None, vocab=None, item_tfms=None, batch_tfms=None,

85

img_cls=PILImage, **kwargs):

86

"""

87

Create ImageDataLoaders from folder structure.

88

89

Parameters:

90

- path: Path to data directory

91

- train: Training folder name

92

- valid: Validation folder name

93

- valid_pct: Validation percentage (if no valid folder)

94

- seed: Random seed for splitting

95

- vocab: Category vocabulary

96

- item_tfms: Item-level transforms

97

- batch_tfms: Batch-level transforms

98

- img_cls: Image class to use

99

100

Returns:

101

- ImageDataLoaders instance

102

"""

103

104

@classmethod

105

def from_name_func(cls, path, fnames, label_func, valid_pct=0.2, seed=None,

106

item_tfms=None, batch_tfms=None, **kwargs):

107

"""

108

Create ImageDataLoaders using filename labeling function.

109

110

Parameters:

111

- path: Path to images

112

- fnames: List of filenames

113

- label_func: Function to extract labels from filenames

114

- valid_pct: Validation percentage

115

- seed: Random seed

116

- item_tfms: Item transforms

117

- batch_tfms: Batch transforms

118

119

Returns:

120

- ImageDataLoaders instance

121

"""

122

123

@classmethod

124

def from_name_re(cls, path, fnames, pat, valid_pct=0.2, **kwargs):

125

"""Create using regex pattern for labels."""

126

127

@classmethod

128

def from_path_func(cls, path, fnames, label_func, valid_pct=0.2, **kwargs):

129

"""Create using path-based labeling function."""

130

131

@classmethod

132

def from_path_re(cls, path, fnames, pat, valid_pct=0.2, **kwargs):

133

"""Create using regex pattern on full paths."""

134

135

@classmethod

136

def from_lists(cls, path, fnames, labels, valid_pct=0.2, **kwargs):

137

"""Create from filename and label lists."""

138

139

@classmethod

140

def from_csv(cls, path, csv_fname, header='infer', delimiter=None, **kwargs):

141

"""Create from CSV file."""

142

143

@classmethod

144

def from_df(cls, df, path='.', valid_pct=0.2, **kwargs):

145

"""Create from pandas DataFrame."""

146

```

147

148

### Image Classes

149

150

Core image classes for handling different image types.

151

152

```python { .api }

153

class PILImage(PILBase):

154

"""PIL Image wrapper with fastai functionality."""

155

156

@classmethod

157

def create(cls, fn:(Path,str,Tensor,ndarray,bytes), **kwargs):

158

"""Create PILImage from various sources."""

159

160

def show(self, ctx=None, figsize=None, title=None, **kwargs):

161

"""Display the image."""

162

163

def to_thumb(self, h, w=None):

164

"""Create thumbnail."""

165

166

@property

167

def shape(self):

168

"""Image shape as (height, width, channels)."""

169

170

class PILImageBW(PILImage):

171

"""PIL Image wrapper for grayscale images."""

172

173

_show_args = {'cmap': 'gray'}

174

175

class PILMask(PILImage):

176

"""PIL Image wrapper for segmentation masks."""

177

178

_show_args = {'cmap': 'tab20', 'alpha': 0.5, 'vmin': 0, 'vmax': 20}

179

180

def show(self, ctx=None, figsize=None, title=None, **kwargs):

181

"""Display mask with color mapping."""

182

```

183

184

### Data Augmentation

185

186

Comprehensive augmentation pipeline for robust model training.

187

188

```python { .api }

189

def aug_transforms(mult=1.0, do_flip=True, flip_vert=False, max_rotate=10.0,

190

min_zoom=1.0, max_zoom=1.1, max_lighting=0.2, max_warp=0.2,

191

p_affine=0.75, p_lighting=0.75, xtra_tfms=None, size=None,

192

mode='bilinear', pad_mode='reflection', align_corners=True,

193

batch=False, min_scale=1.0):

194

"""

195

Standard set of augmentation transforms.

196

197

Parameters:

198

- mult: Multiplier for augmentation strength

199

- do_flip: Enable horizontal flips

200

- flip_vert: Enable vertical flips

201

- max_rotate: Maximum rotation degrees

202

- min_zoom: Minimum zoom factor

203

- max_zoom: Maximum zoom factor

204

- max_lighting: Maximum lighting change

205

- max_warp: Maximum perspective warp

206

- p_affine: Probability of affine transforms

207

- p_lighting: Probability of lighting transforms

208

- xtra_tfms: Additional transforms

209

- size: Target size for transforms

210

- mode: Interpolation mode

211

- pad_mode: Padding mode

212

- align_corners: Align corners in resize

213

- batch: Apply at batch level

214

- min_scale: Minimum scale factor

215

216

Returns:

217

- List of transform objects

218

"""

219

220

class Resize(RandTransform):

221

"""Resize images to specified size."""

222

223

def __init__(self, size, method=ResizeMethod.Crop, pad_mode=PadMode.Reflection,

224

resamples=(Image.BILINEAR, Image.NEAREST), **kwargs): ...

225

226

class RandomResizedCrop(RandTransform):

227

"""Random crop with resize (like ImageNet training)."""

228

229

def __init__(self, size, min_scale=0.08, ratio=(3/4, 4/3), resamples=(Image.BILINEAR, Image.NEAREST),

230

val_xtra=0.14, **kwargs): ...

231

232

class CropPad(Transform):

233

"""Crop or pad to specified size."""

234

235

def __init__(self, size, pad_mode=PadMode.Reflection, **kwargs): ...

236

237

class FlipItem(RandTransform):

238

"""Random horizontal/vertical flips."""

239

240

def __init__(self, p=0.5): ...

241

242

class DihedralItem(RandTransform):

243

"""Random 90-degree rotations and flips."""

244

245

def __init__(self, p=0.5): ...

246

247

class Brightness(RandTransform):

248

"""Random brightness adjustment."""

249

250

def __init__(self, max_lighting=0.2, p=0.75, draw=None, batch=False): ...

251

252

class Contrast(RandTransform):

253

"""Random contrast adjustment."""

254

255

def __init__(self, max_lighting=0.2, p=0.75, draw=None, batch=False): ...

256

257

class Saturation(RandTransform):

258

"""Random saturation adjustment."""

259

260

def __init__(self, max_lighting=0.2, p=0.75, draw=None, batch=False): ...

261

262

class Hue(RandTransform):

263

"""Random hue shift."""

264

265

def __init__(self, max_hue=0.1, p=0.75, draw=None, batch=False): ...

266

267

class Cutout(RandTransform):

268

"""Random rectangular occlusion."""

269

270

def __init__(self, n_holes=1, length=40, p=0.5): ...

271

272

class RandomErasing(RandTransform):

273

"""Random erasing augmentation."""

274

275

def __init__(self, p=0.5, sh=0.4, min_aspect=0.3, max_count=1): ...

276

```

277

278

### Pre-trained Model Architectures

279

280

Access to various pre-trained model architectures optimized for different tasks.

281

282

```python { .api }

283

def xresnet18(pretrained=False, **kwargs):

284

"""XResNet-18 architecture."""

285

286

def xresnet34(pretrained=False, **kwargs):

287

"""XResNet-34 architecture."""

288

289

def xresnet50(pretrained=False, **kwargs):

290

"""XResNet-50 architecture."""

291

292

def xresnet101(pretrained=False, **kwargs):

293

"""XResNet-101 architecture."""

294

295

def xresnet152(pretrained=False, **kwargs):

296

"""XResNet-152 architecture."""

297

298

class XResNet(nn.Sequential):

299

"""Configurable XResNet architecture."""

300

301

def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,

302

groups=1, width_per_group=64, replace_stride_with_dilation=None,

303

norm_layer=None, act_cls=defaults.activation, **kwargs): ...

304

305

class DynamicUnet(SequentialEx):

306

"""Dynamic U-Net for segmentation."""

307

308

def __init__(self, encoder, n_out, img_size, blur=False, blur_final=True,

309

self_attention=False, y_range=None, last_cross=True,

310

bottle=False, **kwargs): ...

311

312

class TimmBody(nn.Module):

313

"""Body using timm models."""

314

315

def __init__(self, arch, pretrained=True, cut=None, n_in=3): ...

316

```

317

318

### Vision Utilities

319

320

Utility functions for computer vision tasks.

321

322

```python { .api }

323

# Normalization statistics for common datasets

324

imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

325

cifar_stats = ([0.491, 0.482, 0.447], [0.247, 0.243, 0.261])

326

mnist_stats = ([0.131], [0.308])

327

328

def download_images(dest, urls, max_pics=1000, n_workers=8, timeout=4):

329

"""

330

Download images from URLs.

331

332

Parameters:

333

- dest: Destination directory

334

- urls: List of image URLs

335

- max_pics: Maximum images to download

336

- n_workers: Number of worker threads

337

- timeout: Download timeout

338

"""

339

340

def verify_images(fns):

341

"""

342

Verify that image files are valid.

343

344

Parameters:

345

- fns: List of image filenames

346

347

Returns:

348

- List of failed filenames

349

"""

350

351

def show_image(im, ax=None, figsize=None, title=None, ctx=None, **kwargs):

352

"""Display single image."""

353

354

def show_images(ims, nrows=1, ncols=None, titles=None, figsize=None, **kwargs):

355

"""Display multiple images in grid."""

356

357

def subplots(nrows=1, ncols=1, figsize=None, imsize=3, add_vert=0, **kwargs):

358

"""Create matplotlib subplots with fastai styling."""

359

```

360

361

### Segmentation Support

362

363

Specialized functionality for image segmentation tasks.

364

365

```python { .api }

366

class SegmentationDataLoaders(DataLoaders):

367

"""DataLoaders for segmentation tasks."""

368

369

@classmethod

370

def from_label_func(cls, path, fnames, label_func, valid_pct=0.2, **kwargs):

371

"""Create from labeling function that returns mask paths."""

372

373

class MaskBlock(TransformBlock):

374

"""Transform block for segmentation masks."""

375

376

def __init__(self, codes=None): ...

377

378

def DiceLoss():

379

"""Dice loss for segmentation."""

380

381

def JaccardLoss():

382

"""Jaccard (IoU) loss for segmentation."""

383

384

def FocalLoss(alpha=1, gamma=2):

385

"""Focal loss for handling class imbalance."""

386

```