or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdexport.mdhub.mdindex.mdmetrics.mdmodels.mdpipelines.mdpreprocessors.mdtraining.mdutilities.md

preprocessors.mddocs/

0

# Preprocessors

1

2

ModelScope's preprocessor framework provides data preprocessing components for different modalities and tasks. Preprocessors ensure consistent data preparation and format conversion for model inputs.

3

4

## Capabilities

5

6

### Base Preprocessor Class

7

8

Abstract base class for all preprocessors providing common interface.

9

10

```python { .api }

11

class Preprocessor:

12

"""

13

Base preprocessor class for data preprocessing.

14

"""

15

16

def __init__(self, **kwargs):

17

"""

18

Initialize preprocessor with configuration parameters.

19

20

Parameters:

21

- **kwargs: Preprocessor-specific configuration options

22

"""

23

24

def __call__(self, data):

25

"""

26

Process input data.

27

28

Parameters:

29

- data: Input data to preprocess

30

31

Returns:

32

Preprocessed data ready for model input

33

"""

34

35

def forward(self, data):

36

"""

37

Forward pass through preprocessor.

38

39

Parameters:

40

- data: Input data

41

42

Returns:

43

Processed data

44

"""

45

```

46

47

### Preprocessor Builder

48

49

Factory function for creating preprocessors from configuration.

50

51

```python { .api }

52

def build_preprocessor(cfg: dict, default_args: dict = None):

53

"""

54

Build preprocessor from configuration dictionary.

55

56

Parameters:

57

- cfg: Preprocessor configuration dictionary

58

- default_args: Default arguments to merge

59

60

Returns:

61

Preprocessor instance

62

"""

63

64

# Preprocessor registry

65

PREPROCESSORS: dict # Registry of available preprocessor types

66

```

67

68

## Common Preprocessors

69

70

### Composition and Chaining

71

72

```python { .api }

73

class Compose(Preprocessor):

74

"""

75

Chain multiple preprocessors together.

76

"""

77

78

def __init__(self, transforms: list, **kwargs):

79

"""

80

Initialize composition preprocessor.

81

82

Parameters:

83

- transforms: List of preprocessor instances to chain

84

"""

85

86

class Filter(Preprocessor):

87

"""

88

Data filtering preprocessor.

89

"""

90

91

def __init__(self, filter_fn, **kwargs):

92

"""

93

Initialize filter preprocessor.

94

95

Parameters:

96

- filter_fn: Function to determine which data to keep

97

"""

98

```

99

100

### Tensor Conversion

101

102

```python { .api }

103

class ToTensor(Preprocessor):

104

"""

105

Convert data to tensor format.

106

"""

107

108

def __init__(self, **kwargs):

109

"""Initialize tensor conversion preprocessor."""

110

```

111

112

## Image Preprocessors

113

114

### Image Loading and Basic Operations

115

116

```python { .api }

117

class LoadImage(Preprocessor):

118

"""

119

Image loading utility preprocessor.

120

"""

121

122

def __init__(self, color_type: str = 'color', **kwargs):

123

"""

124

Initialize image loader.

125

126

Parameters:

127

- color_type: Image color format ('color', 'grayscale')

128

"""

129

130

def load_image(path: str, color_type: str = 'color') -> Image:

131

"""

132

Load image from file path.

133

134

Parameters:

135

- path: Path to image file

136

- color_type: Color format for loading

137

138

Returns:

139

Loaded image object

140

"""

141

```

142

143

### Image Enhancement Preprocessors

144

145

```python { .api }

146

class ImageColorEnhanceFinetunePreprocessor(Preprocessor):

147

"""

148

Preprocessor for image color enhancement fine-tuning tasks.

149

"""

150

151

def __init__(self, **kwargs):

152

"""Initialize color enhancement preprocessor."""

153

154

class ImageDenoisePreprocessor(Preprocessor):

155

"""

156

Preprocessor for image denoising tasks.

157

"""

158

159

def __init__(self, **kwargs):

160

"""Initialize denoising preprocessor."""

161

162

class ImageDeblurPreprocessor(Preprocessor):

163

"""

164

Preprocessor for image deblurring tasks.

165

"""

166

167

def __init__(self, **kwargs):

168

"""Initialize deblurring preprocessor."""

169

170

class ImageRestorationPreprocessor(Preprocessor):

171

"""

172

General image restoration preprocessor.

173

"""

174

175

def __init__(self, **kwargs):

176

"""Initialize restoration preprocessor."""

177

```

178

179

### Computer Vision Preprocessors

180

181

```python { .api }

182

class ImageClassificationMmcvPreprocessor(Preprocessor):

183

"""

184

MMCV-based preprocessor for image classification.

185

"""

186

187

def __init__(self, **kwargs):

188

"""Initialize MMCV image classification preprocessor."""

189

190

class ImageInstanceSegmentationPreprocessor(Preprocessor):

191

"""

192

Preprocessor for instance segmentation tasks.

193

"""

194

195

def __init__(self, **kwargs):

196

"""Initialize instance segmentation preprocessor."""

197

198

class ControllableImageGenerationPreprocessor(Preprocessor):

199

"""

200

Preprocessor for controllable image generation tasks.

201

"""

202

203

def __init__(self, **kwargs):

204

"""Initialize controllable generation preprocessor."""

205

```

206

207

## Natural Language Processing Preprocessors

208

209

### Transformer-based Preprocessors

210

211

```python { .api }

212

class TextClassificationTransformersPreprocessor(Preprocessor):

213

"""

214

Transformer-based preprocessor for text classification.

215

"""

216

217

def __init__(self, model_dir: str, **kwargs):

218

"""

219

Initialize text classification preprocessor.

220

221

Parameters:

222

- model_dir: Directory containing tokenizer and model files

223

"""

224

225

class TokenClassificationTransformersPreprocessor(Preprocessor):

226

"""

227

Transformer-based preprocessor for token classification (NER, POS tagging).

228

"""

229

230

def __init__(self, model_dir: str, **kwargs):

231

"""

232

Initialize token classification preprocessor.

233

234

Parameters:

235

- model_dir: Directory containing model and tokenizer

236

"""

237

238

class TextGenerationTransformersPreprocessor(Preprocessor):

239

"""

240

Preprocessor for text generation tasks.

241

"""

242

243

def __init__(self, model_dir: str, **kwargs):

244

"""

245

Initialize text generation preprocessor.

246

247

Parameters:

248

- model_dir: Model directory path

249

"""

250

251

class SentenceEmbeddingTransformersPreprocessor(Preprocessor):

252

"""

253

Preprocessor for sentence embedding tasks.

254

"""

255

256

def __init__(self, model_dir: str, **kwargs):

257

"""

258

Initialize sentence embedding preprocessor.

259

260

Parameters:

261

- model_dir: Model directory path

262

"""

263

264

class FillMaskTransformersPreprocessor(Preprocessor):

265

"""

266

Preprocessor for fill-mask (masked language modeling) tasks.

267

"""

268

269

def __init__(self, model_dir: str, **kwargs):

270

"""

271

Initialize fill-mask preprocessor.

272

273

Parameters:

274

- model_dir: Model directory path

275

"""

276

```

277

278

### Basic Text Preprocessors

279

280

```python { .api }

281

class Tokenize(Preprocessor):

282

"""

283

Basic tokenization preprocessor.

284

"""

285

286

def __init__(self, tokenizer_type: str = 'basic', **kwargs):

287

"""

288

Initialize tokenizer.

289

290

Parameters:

291

- tokenizer_type: Type of tokenizer to use

292

"""

293

294

class SentencePiecePreprocessor(Preprocessor):

295

"""

296

SentencePiece tokenization preprocessor.

297

"""

298

299

def __init__(self, model_file: str, **kwargs):

300

"""

301

Initialize SentencePiece preprocessor.

302

303

Parameters:

304

- model_file: Path to SentencePiece model file

305

"""

306

```

307

308

## Audio Preprocessors

309

310

### Audio Processing and Feature Extraction

311

312

```python { .api }

313

class LinearAECAndFbank(Preprocessor):

314

"""

315

Linear Acoustic Echo Cancellation and Filter Bank feature extraction.

316

"""

317

318

def __init__(self, **kwargs):

319

"""Initialize AEC and filter bank preprocessor."""

320

321

class AudioBrainPreprocessor(Preprocessor):

322

"""

323

AudioBrain-based preprocessing for speech tasks.

324

"""

325

326

def __init__(self, **kwargs):

327

"""Initialize AudioBrain preprocessor."""

328

```

329

330

### Audio Format Conversion

331

332

```python { .api }

333

class WavToScp(Preprocessor):

334

"""

335

Convert WAV files to SCP (Kaldi script) format.

336

"""

337

338

def __init__(self, **kwargs):

339

"""Initialize WAV to SCP converter."""

340

341

class WavToLists(Preprocessor):

342

"""

343

Convert WAV files to list format for batch processing.

344

"""

345

346

def __init__(self, **kwargs):

347

"""Initialize WAV to lists converter."""

348

349

class KanttsDataPreprocessor(Preprocessor):

350

"""

351

Preprocessor for Kantts text-to-speech data preparation.

352

"""

353

354

def __init__(self, **kwargs):

355

"""Initialize Kantts data preprocessor."""

356

```

357

358

## Multi-Modal Preprocessors

359

360

### Vision-Language Preprocessors

361

362

```python { .api }

363

class OfaPreprocessor(Preprocessor):

364

"""

365

Preprocessor for OFA (One For All) multi-modal model.

366

"""

367

368

def __init__(self, **kwargs):

369

"""Initialize OFA preprocessor."""

370

371

class MPlugPreprocessor(Preprocessor):

372

"""

373

Preprocessor for MPlug multi-modal model.

374

"""

375

376

def __init__(self, **kwargs):

377

"""Initialize MPlug preprocessor."""

378

379

class HiTeAPreprocessor(Preprocessor):

380

"""

381

Preprocessor for HiTeA (Hierarchical Text-Image) model.

382

"""

383

384

def __init__(self, **kwargs):

385

"""Initialize HiTeA preprocessor."""

386

387

class MplugOwlPreprocessor(Preprocessor):

388

"""

389

Preprocessor for MplugOwl multi-modal model.

390

"""

391

392

def __init__(self, **kwargs):

393

"""Initialize MplugOwl preprocessor."""

394

```

395

396

### Image Generation and Captioning

397

398

```python { .api }

399

class DiffusionImageGenerationPreprocessor(Preprocessor):

400

"""

401

Preprocessor for diffusion-based image generation.

402

"""

403

404

def __init__(self, **kwargs):

405

"""Initialize diffusion generation preprocessor."""

406

407

class ImageCaptioningClipInterrogatorPreprocessor(Preprocessor):

408

"""

409

CLIP-based preprocessor for image captioning tasks.

410

"""

411

412

def __init__(self, **kwargs):

413

"""Initialize CLIP interrogator preprocessor."""

414

```

415

416

## Video Preprocessors

417

418

```python { .api }

419

class ReadVideoData(Preprocessor):

420

"""

421

Video data reading and preprocessing.

422

"""

423

424

def __init__(self, **kwargs):

425

"""Initialize video data reader."""

426

427

class MovieSceneSegmentationPreprocessor(Preprocessor):

428

"""

429

Preprocessor for movie scene segmentation tasks.

430

"""

431

432

def __init__(self, **kwargs):

433

"""Initialize scene segmentation preprocessor."""

434

```

435

436

## Usage Examples

437

438

### Basic Preprocessor Usage

439

440

```python

441

from modelscope import Preprocessor, LoadImage, ToTensor, Compose

442

443

# Single preprocessor

444

image_loader = LoadImage(color_type='color')

445

image = image_loader('path/to/image.jpg')

446

447

# Compose multiple preprocessors

448

pipeline = Compose([

449

LoadImage(color_type='color'),

450

ToTensor()

451

])

452

453

processed_image = pipeline('path/to/image.jpg')

454

```

455

456

### Text Preprocessing

457

458

```python

459

from modelscope import TextClassificationTransformersPreprocessor

460

461

# Initialize text preprocessor

462

preprocessor = TextClassificationTransformersPreprocessor(

463

model_dir='damo/nlp_structbert_sentence-similarity_chinese'

464

)

465

466

# Process text data

467

text = "这是一个文本分类的例子"

468

processed = preprocessor(text)

469

print(f"Processed text: {processed}")

470

471

# Batch processing

472

texts = ["文本1", "文本2", "文本3"]

473

batch_processed = preprocessor(texts)

474

```

475

476

### Custom Preprocessor Implementation

477

478

```python

479

from modelscope import Preprocessor

480

481

class CustomTextPreprocessor(Preprocessor):

482

def __init__(self, max_length=512, **kwargs):

483

super().__init__(**kwargs)

484

self.max_length = max_length

485

486

def __call__(self, data):

487

# Custom preprocessing logic

488

if isinstance(data, str):

489

# Truncate text

490

data = data[:self.max_length]

491

# Add special tokens

492

data = f"[CLS] {data} [SEP]"

493

return data

494

495

# Use custom preprocessor

496

custom_prep = CustomTextPreprocessor(max_length=256)

497

result = custom_prep("这是一个很长的文本示例...")

498

```

499

500

### Image Preprocessing Pipeline

501

502

```python

503

from modelscope import LoadImage, ImageClassificationMmcvPreprocessor

504

505

# Load and preprocess image for classification

506

image_path = 'path/to/image.jpg'

507

508

# Method 1: Direct loading

509

image = LoadImage(color_type='color')(image_path)

510

511

# Method 2: Classification-specific preprocessing

512

classifier_prep = ImageClassificationMmcvPreprocessor()

513

processed_image = classifier_prep(image_path)

514

```

515

516

### Audio Preprocessing

517

518

```python

519

from modelscope import LinearAECAndFbank, WavToScp

520

521

# Audio feature extraction

522

audio_preprocessor = LinearAECAndFbank()

523

features = audio_preprocessor('path/to/audio.wav')

524

525

# Convert to SCP format

526

wav_converter = WavToScp()

527

scp_data = wav_converter('path/to/audio.wav')

528

```

529

530

### Multi-Modal Preprocessing

531

532

```python

533

from modelscope import OfaPreprocessor

534

535

# Multi-modal preprocessing for OFA model

536

ofa_prep = OfaPreprocessor()

537

538

# Process image-text pair

539

result = ofa_prep({

540

'image': 'path/to/image.jpg',

541

'text': '描述这张图片'

542

})

543

```

544

545

### Preprocessor Configuration

546

547

```python

548

from modelscope import build_preprocessor

549

550

# Build preprocessor from configuration

551

prep_config = {

552

'type': 'TextClassificationTransformersPreprocessor',

553

'model_dir': 'damo/nlp_structbert_base_chinese',

554

'max_length': 512,

555

'padding': True,

556

'truncation': True

557

}

558

559

preprocessor = build_preprocessor(prep_config)

560

result = preprocessor("输入文本")

561

```

562

563

### Chaining Preprocessors for Complex Workflows

564

565

```python

566

from modelscope import Compose, LoadImage, ToTensor

567

568

# Create preprocessing pipeline

569

image_pipeline = Compose([

570

LoadImage(color_type='color'),

571

# Custom resize function could be added here

572

ToTensor()

573

])

574

575

# Process single image

576

processed = image_pipeline('image.jpg')

577

578

# Process batch of images

579

image_paths = ['img1.jpg', 'img2.jpg', 'img3.jpg']

580

batch_processed = [image_pipeline(path) for path in image_paths]

581

```