Django application and library for importing and exporting data with included admin integration.
—
Support for multiple file formats through tablib integration, including CSV, XLSX, JSON, YAML, and more, with configurable format options.
Foundation classes for all file format implementations.
class Format:
def get_title(self):
"""
Get display title for the format.
Returns:
str, format display name
"""
def create_dataset(self, in_stream):
"""
Create tablib Dataset from input stream.
Parameters:
- in_stream: Input data stream
Returns:
tablib.Dataset instance
"""
def export_data(self, dataset, **kwargs):
"""
Export dataset to format-specific representation.
Parameters:
- dataset: tablib.Dataset to export
- **kwargs: Format-specific export options
Returns:
Formatted export data (bytes or string)
"""
def is_binary(self):
"""
Check if format produces binary output.
Returns:
bool, True if format is binary
"""
def get_read_mode(self):
"""
Get file read mode for this format.
Returns:
str, file mode ('rb' or 'r')
"""
def get_extension(self):
"""
Get file extension for this format.
Returns:
str, file extension (e.g., 'csv', 'xlsx')
"""
def get_content_type(self):
"""
Get MIME content type for this format.
Returns:
str, MIME content type
"""
class TablibFormat(Format):
"""Base class for tablib-integrated formats."""
TABLIB_MODULE = None
def create_dataset(self, in_stream):
"""Create dataset using tablib's format-specific loader."""
def export_data(self, dataset, **kwargs):
"""Export using tablib's format-specific exporter."""
def get_title(self):
"""Get title from tablib format."""
class TextFormat(TablibFormat):
"""Base class for text-based formats."""
def is_binary(self):
"""Text formats are not binary."""
return False
def get_read_mode(self):
"""Text formats use text read mode."""
return 'r'Implementations for text-based file formats.
class CSV(TextFormat):
"""Comma-separated values format."""
TABLIB_MODULE = 'csv'
def get_extension(self):
return 'csv'
def get_content_type(self):
return 'text/csv'
class TSV(TextFormat):
"""Tab-separated values format."""
TABLIB_MODULE = 'tsv'
def get_extension(self):
return 'tsv'
def get_content_type(self):
return 'text/tab-separated-values'
class JSON(TextFormat):
"""JavaScript Object Notation format."""
TABLIB_MODULE = 'json'
def get_extension(self):
return 'json'
def get_content_type(self):
return 'application/json'
class YAML(TextFormat):
"""YAML Ain't Markup Language format."""
TABLIB_MODULE = 'yaml'
def get_extension(self):
return 'yaml'
def get_content_type(self):
return 'application/x-yaml'
class HTML(TextFormat):
"""HyperText Markup Language table format."""
TABLIB_MODULE = 'html'
def get_extension(self):
return 'html'
def get_content_type(self):
return 'text/html'
class ODS(TextFormat):
"""OpenDocument Spreadsheet format."""
TABLIB_MODULE = 'ods'
def get_extension(self):
return 'ods'
def get_content_type(self):
return 'application/vnd.oasis.opendocument.spreadsheet'Implementations for binary file formats.
class XLS(TablibFormat):
"""Microsoft Excel 97-2003 format."""
TABLIB_MODULE = 'xls'
def is_binary(self):
return True
def get_extension(self):
return 'xls'
def get_content_type(self):
return 'application/vnd.ms-excel'
class XLSX(TablibFormat):
"""Microsoft Excel 2007+ format."""
TABLIB_MODULE = 'xlsx'
def is_binary(self):
return True
def get_extension(self):
return 'xlsx'
def get_content_type(self):
return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'Utility functions for working with formats.
def get_format_class(format_name, file_name=None, encoding=None):
"""
Get format class from format name or file extension.
Parameters:
- format_name: str, format name or dotted path to format class
- file_name: str, optional filename to infer format from extension
- encoding: str, optional encoding for text formats
Returns:
Format class instance
"""
def get_default_format_names():
"""
Get list of default format names.
Returns:
List of available format names
"""
# Available formats
DEFAULT_FORMATS = [CSV, XLSX, TSV, ODS, JSON, YAML, HTML, XLS]
BINARY_FORMATS = [XLS, XLSX]from import_export.formats.base_formats import CSV, XLSX, JSON
from import_export import resources
class BookResource(resources.ModelResource):
class Meta:
model = Book
# Export to different formats
resource = BookResource()
dataset = resource.export()
# CSV export
csv_format = CSV()
csv_data = csv_format.export_data(dataset)
# Excel export
xlsx_format = XLSX()
xlsx_data = xlsx_format.export_data(dataset)
# JSON export
json_format = JSON()
json_data = json_format.export_data(dataset)from import_export.formats.base_formats import CSV, JSON
import tablib
# Import from CSV
csv_format = CSV()
with open('books.csv', 'r') as f:
dataset = csv_format.create_dataset(f.read())
# Import from JSON
json_format = JSON()
with open('books.json', 'r') as f:
dataset = json_format.create_dataset(f.read())
# Process import
resource = BookResource()
result = resource.import_data(dataset, dry_run=True)from import_export.formats.base_formats import TextFormat
import xml.etree.ElementTree as ET
import tablib
class XMLFormat(TextFormat):
"""Custom XML format implementation."""
def get_title(self):
return 'xml'
def get_extension(self):
return 'xml'
def get_content_type(self):
return 'application/xml'
def create_dataset(self, in_stream):
"""Parse XML and create dataset."""
root = ET.fromstring(in_stream)
# Extract headers from first row
headers = []
rows = []
for item in root.findall('item'):
row = {}
for child in item:
if child.tag not in headers:
headers.append(child.tag)
row[child.tag] = child.text
rows.append([row.get(h, '') for h in headers])
dataset = tablib.Dataset()
dataset.headers = headers
for row in rows:
dataset.append(row)
return dataset
def export_data(self, dataset, **kwargs):
"""Export dataset to XML."""
root = ET.Element('data')
for row in dataset:
item = ET.SubElement(root, 'item')
for header, value in zip(dataset.headers, row):
field = ET.SubElement(item, header)
field.text = str(value) if value is not None else ''
return ET.tostring(root, encoding='unicode')from import_export.command_utils import get_format_class
def export_with_format(resource, format_name, filename=None):
"""Export resource data with specified format."""
# Get format class dynamically
format_class = get_format_class(format_name, filename)
# Export data
dataset = resource.export()
export_data = format_class.export_data(dataset)
# Determine filename if not provided
if not filename:
extension = format_class.get_extension()
filename = f"export.{extension}"
# Write to file
mode = 'wb' if format_class.is_binary() else 'w'
with open(filename, mode) as f:
f.write(export_data)
return filename
# Usage
resource = BookResource()
export_with_format(resource, 'CSV', 'books.csv')
export_with_format(resource, 'XLSX', 'books.xlsx')class CustomCSVFormat(CSV):
"""CSV format with custom options."""
def export_data(self, dataset, **kwargs):
# Custom CSV export options
delimiter = kwargs.get('delimiter', ',')
quotechar = kwargs.get('quotechar', '"')
# Use tablib's CSV export with custom options
return dataset.export('csv', delimiter=delimiter, quotechar=quotechar)
# Usage with custom options
resource = BookResource()
dataset = resource.export()
csv_format = CustomCSVFormat()
csv_data = csv_format.export_data(
dataset,
delimiter=';',
quotechar="'"
)class FormatRegistry:
"""Registry for managing format classes."""
def __init__(self):
self._formats = {}
def register(self, name, format_class):
"""Register a format class."""
self._formats[name.lower()] = format_class
def get_format(self, name):
"""Get format class by name."""
return self._formats.get(name.lower())
def list_formats(self):
"""List available format names."""
return list(self._formats.keys())
# Create global registry
format_registry = FormatRegistry()
# Register default formats
format_registry.register('csv', CSV)
format_registry.register('xlsx', XLSX)
format_registry.register('json', JSON)
format_registry.register('yaml', YAML)
# Register custom format
format_registry.register('xml', XMLFormat)
# Usage
format_class = format_registry.get_format('csv')
available_formats = format_registry.list_formats()def choose_format_by_size(dataset, prefer_binary=True):
"""Choose format based on dataset size."""
row_count = len(dataset)
col_count = len(dataset.headers) if dataset.headers else 0
# For large datasets, prefer binary formats
if row_count > 10000 or col_count > 50:
return XLSX() if prefer_binary else CSV()
# For small datasets, prefer text formats
if row_count < 100:
return JSON()
# Default to CSV for medium datasets
return CSV()
# Usage
resource = BookResource()
dataset = resource.export()
format_class = choose_format_by_size(dataset)
export_data = format_class.export_data(dataset)def validate_format_support(format_name):
"""Validate that format is supported."""
try:
format_class = get_format_class(format_name)
# Check if required dependencies are available
if hasattr(format_class, 'TABLIB_MODULE'):
module_name = format_class.TABLIB_MODULE
try:
import tablib
# Try to access the format
getattr(tablib.formats.registry, module_name)
except (ImportError, AttributeError):
return False, f"Format {format_name} requires additional dependencies"
return True, f"Format {format_name} is supported"
except Exception as e:
return False, f"Format {format_name} is not supported: {e}"
# Usage
supported, message = validate_format_support('xlsx')
if supported:
print(f"✓ {message}")
else:
print(f"✗ {message}")def export_to_multiple_formats(resource, formats, base_filename):
"""Export resource to multiple formats."""
dataset = resource.export()
results = {}
for format_name in formats:
try:
format_class = get_format_class(format_name)
extension = format_class.get_extension()
filename = f"{base_filename}.{extension}"
export_data = format_class.export_data(dataset)
mode = 'wb' if format_class.is_binary() else 'w'
with open(filename, mode) as f:
f.write(export_data)
results[format_name] = {
'filename': filename,
'success': True,
'size': len(export_data)
}
except Exception as e:
results[format_name] = {
'filename': None,
'success': False,
'error': str(e)
}
return results
# Usage
resource = BookResource()
results = export_to_multiple_formats(
resource,
['csv', 'xlsx', 'json'],
'books_export'
)
for format_name, result in results.items():
if result['success']:
print(f"✓ {format_name}: {result['filename']} ({result['size']} bytes)")
else:
print(f"✗ {format_name}: {result['error']}")Install with Tessl CLI
npx tessl i tessl/pypi-django-import-export