ModelScope brings the notion of Model-as-a-Service to life with unified interfaces for state-of-the-art machine learning models.
—
ModelScope's preprocessor framework provides data preprocessing components for different modalities and tasks. Preprocessors ensure consistent data preparation and format conversion for model inputs.
Abstract base class for all preprocessors providing common interface.
class Preprocessor:
"""
Base preprocessor class for data preprocessing.
"""
def __init__(self, **kwargs):
"""
Initialize preprocessor with configuration parameters.
Parameters:
- **kwargs: Preprocessor-specific configuration options
"""
def __call__(self, data):
"""
Process input data.
Parameters:
- data: Input data to preprocess
Returns:
Preprocessed data ready for model input
"""
def forward(self, data):
"""
Forward pass through preprocessor.
Parameters:
- data: Input data
Returns:
Processed data
"""Factory function for creating preprocessors from configuration.
def build_preprocessor(cfg: dict, default_args: dict = None):
"""
Build preprocessor from configuration dictionary.
Parameters:
- cfg: Preprocessor configuration dictionary
- default_args: Default arguments to merge
Returns:
Preprocessor instance
"""
# Preprocessor registry
PREPROCESSORS: dict # Registry of available preprocessor typesclass Compose(Preprocessor):
"""
Chain multiple preprocessors together.
"""
def __init__(self, transforms: list, **kwargs):
"""
Initialize composition preprocessor.
Parameters:
- transforms: List of preprocessor instances to chain
"""
class Filter(Preprocessor):
"""
Data filtering preprocessor.
"""
def __init__(self, filter_fn, **kwargs):
"""
Initialize filter preprocessor.
Parameters:
- filter_fn: Function to determine which data to keep
"""class ToTensor(Preprocessor):
"""
Convert data to tensor format.
"""
def __init__(self, **kwargs):
"""Initialize tensor conversion preprocessor."""class LoadImage(Preprocessor):
"""
Image loading utility preprocessor.
"""
def __init__(self, color_type: str = 'color', **kwargs):
"""
Initialize image loader.
Parameters:
- color_type: Image color format ('color', 'grayscale')
"""
def load_image(path: str, color_type: str = 'color') -> Image:
"""
Load image from file path.
Parameters:
- path: Path to image file
- color_type: Color format for loading
Returns:
Loaded image object
"""class ImageColorEnhanceFinetunePreprocessor(Preprocessor):
"""
Preprocessor for image color enhancement fine-tuning tasks.
"""
def __init__(self, **kwargs):
"""Initialize color enhancement preprocessor."""
class ImageDenoisePreprocessor(Preprocessor):
"""
Preprocessor for image denoising tasks.
"""
def __init__(self, **kwargs):
"""Initialize denoising preprocessor."""
class ImageDeblurPreprocessor(Preprocessor):
"""
Preprocessor for image deblurring tasks.
"""
def __init__(self, **kwargs):
"""Initialize deblurring preprocessor."""
class ImageRestorationPreprocessor(Preprocessor):
"""
General image restoration preprocessor.
"""
def __init__(self, **kwargs):
"""Initialize restoration preprocessor."""class ImageClassificationMmcvPreprocessor(Preprocessor):
"""
MMCV-based preprocessor for image classification.
"""
def __init__(self, **kwargs):
"""Initialize MMCV image classification preprocessor."""
class ImageInstanceSegmentationPreprocessor(Preprocessor):
"""
Preprocessor for instance segmentation tasks.
"""
def __init__(self, **kwargs):
"""Initialize instance segmentation preprocessor."""
class ControllableImageGenerationPreprocessor(Preprocessor):
"""
Preprocessor for controllable image generation tasks.
"""
def __init__(self, **kwargs):
"""Initialize controllable generation preprocessor."""class TextClassificationTransformersPreprocessor(Preprocessor):
"""
Transformer-based preprocessor for text classification.
"""
def __init__(self, model_dir: str, **kwargs):
"""
Initialize text classification preprocessor.
Parameters:
- model_dir: Directory containing tokenizer and model files
"""
class TokenClassificationTransformersPreprocessor(Preprocessor):
"""
Transformer-based preprocessor for token classification (NER, POS tagging).
"""
def __init__(self, model_dir: str, **kwargs):
"""
Initialize token classification preprocessor.
Parameters:
- model_dir: Directory containing model and tokenizer
"""
class TextGenerationTransformersPreprocessor(Preprocessor):
"""
Preprocessor for text generation tasks.
"""
def __init__(self, model_dir: str, **kwargs):
"""
Initialize text generation preprocessor.
Parameters:
- model_dir: Model directory path
"""
class SentenceEmbeddingTransformersPreprocessor(Preprocessor):
"""
Preprocessor for sentence embedding tasks.
"""
def __init__(self, model_dir: str, **kwargs):
"""
Initialize sentence embedding preprocessor.
Parameters:
- model_dir: Model directory path
"""
class FillMaskTransformersPreprocessor(Preprocessor):
"""
Preprocessor for fill-mask (masked language modeling) tasks.
"""
def __init__(self, model_dir: str, **kwargs):
"""
Initialize fill-mask preprocessor.
Parameters:
- model_dir: Model directory path
"""class Tokenize(Preprocessor):
"""
Basic tokenization preprocessor.
"""
def __init__(self, tokenizer_type: str = 'basic', **kwargs):
"""
Initialize tokenizer.
Parameters:
- tokenizer_type: Type of tokenizer to use
"""
class SentencePiecePreprocessor(Preprocessor):
"""
SentencePiece tokenization preprocessor.
"""
def __init__(self, model_file: str, **kwargs):
"""
Initialize SentencePiece preprocessor.
Parameters:
- model_file: Path to SentencePiece model file
"""class LinearAECAndFbank(Preprocessor):
"""
Linear Acoustic Echo Cancellation and Filter Bank feature extraction.
"""
def __init__(self, **kwargs):
"""Initialize AEC and filter bank preprocessor."""
class AudioBrainPreprocessor(Preprocessor):
"""
AudioBrain-based preprocessing for speech tasks.
"""
def __init__(self, **kwargs):
"""Initialize AudioBrain preprocessor."""class WavToScp(Preprocessor):
"""
Convert WAV files to SCP (Kaldi script) format.
"""
def __init__(self, **kwargs):
"""Initialize WAV to SCP converter."""
class WavToLists(Preprocessor):
"""
Convert WAV files to list format for batch processing.
"""
def __init__(self, **kwargs):
"""Initialize WAV to lists converter."""
class KanttsDataPreprocessor(Preprocessor):
"""
Preprocessor for Kantts text-to-speech data preparation.
"""
def __init__(self, **kwargs):
"""Initialize Kantts data preprocessor."""class OfaPreprocessor(Preprocessor):
"""
Preprocessor for OFA (One For All) multi-modal model.
"""
def __init__(self, **kwargs):
"""Initialize OFA preprocessor."""
class MPlugPreprocessor(Preprocessor):
"""
Preprocessor for MPlug multi-modal model.
"""
def __init__(self, **kwargs):
"""Initialize MPlug preprocessor."""
class HiTeAPreprocessor(Preprocessor):
"""
Preprocessor for HiTeA (Hierarchical Text-Image) model.
"""
def __init__(self, **kwargs):
"""Initialize HiTeA preprocessor."""
class MplugOwlPreprocessor(Preprocessor):
"""
Preprocessor for MplugOwl multi-modal model.
"""
def __init__(self, **kwargs):
"""Initialize MplugOwl preprocessor."""class DiffusionImageGenerationPreprocessor(Preprocessor):
"""
Preprocessor for diffusion-based image generation.
"""
def __init__(self, **kwargs):
"""Initialize diffusion generation preprocessor."""
class ImageCaptioningClipInterrogatorPreprocessor(Preprocessor):
"""
CLIP-based preprocessor for image captioning tasks.
"""
def __init__(self, **kwargs):
"""Initialize CLIP interrogator preprocessor."""class ReadVideoData(Preprocessor):
"""
Video data reading and preprocessing.
"""
def __init__(self, **kwargs):
"""Initialize video data reader."""
class MovieSceneSegmentationPreprocessor(Preprocessor):
"""
Preprocessor for movie scene segmentation tasks.
"""
def __init__(self, **kwargs):
"""Initialize scene segmentation preprocessor."""from modelscope import Preprocessor, LoadImage, ToTensor, Compose
# Single preprocessor
image_loader = LoadImage(color_type='color')
image = image_loader('path/to/image.jpg')
# Compose multiple preprocessors
pipeline = Compose([
LoadImage(color_type='color'),
ToTensor()
])
processed_image = pipeline('path/to/image.jpg')from modelscope import TextClassificationTransformersPreprocessor
# Initialize text preprocessor
preprocessor = TextClassificationTransformersPreprocessor(
model_dir='damo/nlp_structbert_sentence-similarity_chinese'
)
# Process text data
text = "这是一个文本分类的例子"
processed = preprocessor(text)
print(f"Processed text: {processed}")
# Batch processing
texts = ["文本1", "文本2", "文本3"]
batch_processed = preprocessor(texts)from modelscope import Preprocessor
class CustomTextPreprocessor(Preprocessor):
def __init__(self, max_length=512, **kwargs):
super().__init__(**kwargs)
self.max_length = max_length
def __call__(self, data):
# Custom preprocessing logic
if isinstance(data, str):
# Truncate text
data = data[:self.max_length]
# Add special tokens
data = f"[CLS] {data} [SEP]"
return data
# Use custom preprocessor
custom_prep = CustomTextPreprocessor(max_length=256)
result = custom_prep("这是一个很长的文本示例...")from modelscope import LoadImage, ImageClassificationMmcvPreprocessor
# Load and preprocess image for classification
image_path = 'path/to/image.jpg'
# Method 1: Direct loading
image = LoadImage(color_type='color')(image_path)
# Method 2: Classification-specific preprocessing
classifier_prep = ImageClassificationMmcvPreprocessor()
processed_image = classifier_prep(image_path)from modelscope import LinearAECAndFbank, WavToScp
# Audio feature extraction
audio_preprocessor = LinearAECAndFbank()
features = audio_preprocessor('path/to/audio.wav')
# Convert to SCP format
wav_converter = WavToScp()
scp_data = wav_converter('path/to/audio.wav')from modelscope import OfaPreprocessor
# Multi-modal preprocessing for OFA model
ofa_prep = OfaPreprocessor()
# Process image-text pair
result = ofa_prep({
'image': 'path/to/image.jpg',
'text': '描述这张图片'
})from modelscope import build_preprocessor
# Build preprocessor from configuration
prep_config = {
'type': 'TextClassificationTransformersPreprocessor',
'model_dir': 'damo/nlp_structbert_base_chinese',
'max_length': 512,
'padding': True,
'truncation': True
}
preprocessor = build_preprocessor(prep_config)
result = preprocessor("输入文本")from modelscope import Compose, LoadImage, ToTensor
# Create preprocessing pipeline
image_pipeline = Compose([
LoadImage(color_type='color'),
# Custom resize function could be added here
ToTensor()
])
# Process single image
processed = image_pipeline('image.jpg')
# Process batch of images
image_paths = ['img1.jpg', 'img2.jpg', 'img3.jpg']
batch_processed = [image_pipeline(path) for path in image_paths]Install with Tessl CLI
npx tessl i tessl/pypi-modelscope