Generates Python data models from various schema formats including OpenAPI, JSON Schema, GraphQL, and raw data
npx @tessl/cli install tessl/pypi-datamodel-code-generator@0.33.0A comprehensive Python library and CLI tool that automatically generates Python data models from various structured input formats. It transforms OpenAPI schemas, JSON Schema, JSON/YAML/CSV data, Python dictionaries, and GraphQL schemas into ready-to-use Python data structures including Pydantic BaseModel (v1 and v2), dataclasses, TypedDict, and msgspec.Struct types.
pip install datamodel-code-generator[http] for remote schema fetching[graphql] for GraphQL schema support[validation] for OpenAPI validation[debug] for debugging features[ruff] for Ruff code formattingfrom datamodel_code_generator import generateFor CLI usage:
datamodel-codegen --input schema.yaml --output models.pyCommon enums and types:
from datamodel_code_generator import (
DataModelType,
InputFileType,
PythonVersion,
Error,
OpenAPIScope,
GraphQLScope
)Format module imports:
from datamodel_code_generator.format import (
DatetimeClassType,
Formatter,
PythonVersionMin,
DEFAULT_FORMATTERS
)# Generate from OpenAPI schema
datamodel-codegen --input api.yaml --output models.py
# Generate Pydantic v2 models
datamodel-codegen --input schema.json --output models.py --output-model-type pydantic_v2.BaseModel
# Generate dataclasses
datamodel-codegen --input data.json --output models.py --output-model-type dataclasses.dataclassfrom datamodel_code_generator import generate, DataModelType, InputFileType
from pathlib import Path
# Generate from file
generate(
input_=Path("api.yaml"),
output=Path("models.py"),
output_model_type=DataModelType.PydanticV2BaseModel
)
# Generate from string
schema_text = '''
{
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"}
}
}
'''
generate(
input_=schema_text,
input_file_type=InputFileType.JsonSchema,
output=Path("models.py")
)The code generator follows a modular architecture with three main components:
This design enables extensibility through custom templates, formatters, and model types while maintaining consistency across different input and output formats.
The primary programmatic interface for generating Python data models with extensive configuration options for input sources, output formats, and code generation behavior.
def generate(
input_: Path | str | ParseResult | Mapping[str, Any],
*,
input_filename: str | None = None,
input_file_type: InputFileType = InputFileType.Auto,
output: Path | None = None,
output_model_type: DataModelType = DataModelType.PydanticBaseModel,
target_python_version: PythonVersion = PythonVersionMin,
**kwargs
) -> None:
"""
Generate Python data models from input schema.
Args:
input_: Input source (file path, URL, string content, or dict)
input_filename: Name for the input file (for metadata)
input_file_type: Type of input format (auto-detected if Auto)
output: Output file path (None for stdout)
output_model_type: Python model type to generate
target_python_version: Target Python version for compatibility
**kwargs: 70+ additional configuration parameters
"""Command-line interface providing access to all generation features with argument parsing and user-friendly options.
def main() -> None:
"""Main CLI entry point accessed via 'datamodel-codegen' command."""Automatic detection of input schema formats with support for multiple structured data types.
def infer_input_type(text: str) -> InputFileType:
"""
Automatically detect input file type from content.
Args:
text: Input text content
Returns:
Detected InputFileType enum value
"""
def is_openapi(data: dict) -> bool:
"""Check if dictionary contains OpenAPI specification."""
def is_schema(data: dict) -> bool:
"""Check if dictionary contains JSON Schema."""Essential enums and constants for configuring generation behavior.
class InputFileType(Enum):
"""Supported input schema formats."""
Auto = "auto"
OpenAPI = "openapi"
JsonSchema = "jsonschema"
Json = "json"
Yaml = "yaml"
Dict = "dict"
CSV = "csv"
GraphQL = "graphql"
class DataModelType(Enum):
"""Supported output Python model types."""
PydanticBaseModel = "pydantic.BaseModel"
PydanticV2BaseModel = "pydantic_v2.BaseModel"
DataclassesDataclass = "dataclasses.dataclass"
TypingTypedDict = "typing.TypedDict"
MsgspecStruct = "msgspec.Struct"
class OpenAPIScope(Enum):
"""OpenAPI parsing scope options."""
Schemas = "schemas"
Paths = "paths"
Tags = "tags"
Parameters = "parameters"Helper functions for YAML processing, version info, and directory management.
def get_version() -> str:
"""Get package version string."""
def load_yaml(stream: str | TextIO) -> Any:
"""Load YAML from string or stream."""
def load_yaml_from_path(path: Path, encoding: str) -> Any:
"""Load YAML from file path."""
@contextmanager
def chdir(path: Path | None) -> Iterator[None]:
"""Context manager for temporary directory changes."""Exception classes for error handling and validation.
class Error(Exception):
"""Base exception class for datamodel-code-generator."""
def __init__(self, message: str) -> None: ...
class InvalidClassNameError(Error):
"""Raised when generated class names are invalid."""
def __init__(self, class_name: str) -> None: ...Utility Functions and Error Handling
# Version constants
MIN_VERSION: Final[int] = 9 # Python 3.9
MAX_VERSION: Final[int] = 13 # Python 3.13
# Default values
DEFAULT_BASE_CLASS: str = "pydantic.BaseModel"
# Schema detection constants
JSON_SCHEMA_URLS: tuple[str, ...] = (
"http://json-schema.org/",
"https://json-schema.org/",
)
# Raw data type formats
RAW_DATA_TYPES: list[InputFileType] = [
InputFileType.Json,
InputFileType.Yaml,
InputFileType.Dict,
InputFileType.CSV,
InputFileType.GraphQL,
]
# Type aliases and protocols from types module
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections import defaultdict
from datamodel_code_generator.model.pydantic_v2 import UnionMode
from datamodel_code_generator.parser.base import Parser
from datamodel_code_generator.types import StrictTypes