0
# File Formats
1
2
Support for multiple file formats through tablib integration, including CSV, XLSX, JSON, YAML, and more, with configurable format options.
3
4
## Capabilities
5
6
### Base Format Classes
7
8
Foundation classes for all file format implementations.
9
10
```python { .api }
11
class Format:
12
def get_title(self):
13
"""
14
Get display title for the format.
15
16
Returns:
17
str, format display name
18
"""
19
20
def create_dataset(self, in_stream):
21
"""
22
Create tablib Dataset from input stream.
23
24
Parameters:
25
- in_stream: Input data stream
26
27
Returns:
28
tablib.Dataset instance
29
"""
30
31
def export_data(self, dataset, **kwargs):
32
"""
33
Export dataset to format-specific representation.
34
35
Parameters:
36
- dataset: tablib.Dataset to export
37
- **kwargs: Format-specific export options
38
39
Returns:
40
Formatted export data (bytes or string)
41
"""
42
43
def is_binary(self):
44
"""
45
Check if format produces binary output.
46
47
Returns:
48
bool, True if format is binary
49
"""
50
51
def get_read_mode(self):
52
"""
53
Get file read mode for this format.
54
55
Returns:
56
str, file mode ('rb' or 'r')
57
"""
58
59
def get_extension(self):
60
"""
61
Get file extension for this format.
62
63
Returns:
64
str, file extension (e.g., 'csv', 'xlsx')
65
"""
66
67
def get_content_type(self):
68
"""
69
Get MIME content type for this format.
70
71
Returns:
72
str, MIME content type
73
"""
74
75
class TablibFormat(Format):
76
"""Base class for tablib-integrated formats."""
77
78
TABLIB_MODULE = None
79
80
def create_dataset(self, in_stream):
81
"""Create dataset using tablib's format-specific loader."""
82
83
def export_data(self, dataset, **kwargs):
84
"""Export using tablib's format-specific exporter."""
85
86
def get_title(self):
87
"""Get title from tablib format."""
88
89
class TextFormat(TablibFormat):
90
"""Base class for text-based formats."""
91
92
def is_binary(self):
93
"""Text formats are not binary."""
94
return False
95
96
def get_read_mode(self):
97
"""Text formats use text read mode."""
98
return 'r'
99
```
100
101
### Text Formats
102
103
Implementations for text-based file formats.
104
105
```python { .api }
106
class CSV(TextFormat):
107
"""Comma-separated values format."""
108
109
TABLIB_MODULE = 'csv'
110
111
def get_extension(self):
112
return 'csv'
113
114
def get_content_type(self):
115
return 'text/csv'
116
117
class TSV(TextFormat):
118
"""Tab-separated values format."""
119
120
TABLIB_MODULE = 'tsv'
121
122
def get_extension(self):
123
return 'tsv'
124
125
def get_content_type(self):
126
return 'text/tab-separated-values'
127
128
class JSON(TextFormat):
129
"""JavaScript Object Notation format."""
130
131
TABLIB_MODULE = 'json'
132
133
def get_extension(self):
134
return 'json'
135
136
def get_content_type(self):
137
return 'application/json'
138
139
class YAML(TextFormat):
140
"""YAML Ain't Markup Language format."""
141
142
TABLIB_MODULE = 'yaml'
143
144
def get_extension(self):
145
return 'yaml'
146
147
def get_content_type(self):
148
return 'application/x-yaml'
149
150
class HTML(TextFormat):
151
"""HyperText Markup Language table format."""
152
153
TABLIB_MODULE = 'html'
154
155
def get_extension(self):
156
return 'html'
157
158
def get_content_type(self):
159
return 'text/html'
160
161
class ODS(TextFormat):
162
"""OpenDocument Spreadsheet format."""
163
164
TABLIB_MODULE = 'ods'
165
166
def get_extension(self):
167
return 'ods'
168
169
def get_content_type(self):
170
return 'application/vnd.oasis.opendocument.spreadsheet'
171
```
172
173
### Binary Formats
174
175
Implementations for binary file formats.
176
177
```python { .api }
178
class XLS(TablibFormat):
179
"""Microsoft Excel 97-2003 format."""
180
181
TABLIB_MODULE = 'xls'
182
183
def is_binary(self):
184
return True
185
186
def get_extension(self):
187
return 'xls'
188
189
def get_content_type(self):
190
return 'application/vnd.ms-excel'
191
192
class XLSX(TablibFormat):
193
"""Microsoft Excel 2007+ format."""
194
195
TABLIB_MODULE = 'xlsx'
196
197
def is_binary(self):
198
return True
199
200
def get_extension(self):
201
return 'xlsx'
202
203
def get_content_type(self):
204
return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
205
```
206
207
### Format Utilities
208
209
Utility functions for working with formats.
210
211
```python { .api }
212
def get_format_class(format_name, file_name=None, encoding=None):
213
"""
214
Get format class from format name or file extension.
215
216
Parameters:
217
- format_name: str, format name or dotted path to format class
218
- file_name: str, optional filename to infer format from extension
219
- encoding: str, optional encoding for text formats
220
221
Returns:
222
Format class instance
223
"""
224
225
def get_default_format_names():
226
"""
227
Get list of default format names.
228
229
Returns:
230
List of available format names
231
"""
232
233
# Available formats
234
DEFAULT_FORMATS = [CSV, XLSX, TSV, ODS, JSON, YAML, HTML, XLS]
235
BINARY_FORMATS = [XLS, XLSX]
236
```
237
238
## Usage Examples
239
240
### Basic Format Usage
241
242
```python
243
from import_export.formats.base_formats import CSV, XLSX, JSON
244
from import_export import resources
245
246
class BookResource(resources.ModelResource):
247
class Meta:
248
model = Book
249
250
# Export to different formats
251
resource = BookResource()
252
dataset = resource.export()
253
254
# CSV export
255
csv_format = CSV()
256
csv_data = csv_format.export_data(dataset)
257
258
# Excel export
259
xlsx_format = XLSX()
260
xlsx_data = xlsx_format.export_data(dataset)
261
262
# JSON export
263
json_format = JSON()
264
json_data = json_format.export_data(dataset)
265
```
266
267
### Import from Different Formats
268
269
```python
270
from import_export.formats.base_formats import CSV, JSON
271
import tablib
272
273
# Import from CSV
274
csv_format = CSV()
275
with open('books.csv', 'r') as f:
276
dataset = csv_format.create_dataset(f.read())
277
278
# Import from JSON
279
json_format = JSON()
280
with open('books.json', 'r') as f:
281
dataset = json_format.create_dataset(f.read())
282
283
# Process import
284
resource = BookResource()
285
result = resource.import_data(dataset, dry_run=True)
286
```
287
288
### Custom Format Implementation
289
290
```python
291
from import_export.formats.base_formats import TextFormat
292
import xml.etree.ElementTree as ET
293
import tablib
294
295
class XMLFormat(TextFormat):
296
"""Custom XML format implementation."""
297
298
def get_title(self):
299
return 'xml'
300
301
def get_extension(self):
302
return 'xml'
303
304
def get_content_type(self):
305
return 'application/xml'
306
307
def create_dataset(self, in_stream):
308
"""Parse XML and create dataset."""
309
root = ET.fromstring(in_stream)
310
311
# Extract headers from first row
312
headers = []
313
rows = []
314
315
for item in root.findall('item'):
316
row = {}
317
for child in item:
318
if child.tag not in headers:
319
headers.append(child.tag)
320
row[child.tag] = child.text
321
rows.append([row.get(h, '') for h in headers])
322
323
dataset = tablib.Dataset()
324
dataset.headers = headers
325
for row in rows:
326
dataset.append(row)
327
328
return dataset
329
330
def export_data(self, dataset, **kwargs):
331
"""Export dataset to XML."""
332
root = ET.Element('data')
333
334
for row in dataset:
335
item = ET.SubElement(root, 'item')
336
for header, value in zip(dataset.headers, row):
337
field = ET.SubElement(item, header)
338
field.text = str(value) if value is not None else ''
339
340
return ET.tostring(root, encoding='unicode')
341
```
342
343
### Dynamic Format Selection
344
345
```python
346
from import_export.command_utils import get_format_class
347
348
def export_with_format(resource, format_name, filename=None):
349
"""Export resource data with specified format."""
350
351
# Get format class dynamically
352
format_class = get_format_class(format_name, filename)
353
354
# Export data
355
dataset = resource.export()
356
export_data = format_class.export_data(dataset)
357
358
# Determine filename if not provided
359
if not filename:
360
extension = format_class.get_extension()
361
filename = f"export.{extension}"
362
363
# Write to file
364
mode = 'wb' if format_class.is_binary() else 'w'
365
with open(filename, mode) as f:
366
f.write(export_data)
367
368
return filename
369
370
# Usage
371
resource = BookResource()
372
export_with_format(resource, 'CSV', 'books.csv')
373
export_with_format(resource, 'XLSX', 'books.xlsx')
374
```
375
376
### Format-Specific Options
377
378
```python
379
class CustomCSVFormat(CSV):
380
"""CSV format with custom options."""
381
382
def export_data(self, dataset, **kwargs):
383
# Custom CSV export options
384
delimiter = kwargs.get('delimiter', ',')
385
quotechar = kwargs.get('quotechar', '"')
386
387
# Use tablib's CSV export with custom options
388
return dataset.export('csv', delimiter=delimiter, quotechar=quotechar)
389
390
# Usage with custom options
391
resource = BookResource()
392
dataset = resource.export()
393
csv_format = CustomCSVFormat()
394
csv_data = csv_format.export_data(
395
dataset,
396
delimiter=';',
397
quotechar="'"
398
)
399
```
400
401
### Format Registry Pattern
402
403
```python
404
class FormatRegistry:
405
"""Registry for managing format classes."""
406
407
def __init__(self):
408
self._formats = {}
409
410
def register(self, name, format_class):
411
"""Register a format class."""
412
self._formats[name.lower()] = format_class
413
414
def get_format(self, name):
415
"""Get format class by name."""
416
return self._formats.get(name.lower())
417
418
def list_formats(self):
419
"""List available format names."""
420
return list(self._formats.keys())
421
422
# Create global registry
423
format_registry = FormatRegistry()
424
425
# Register default formats
426
format_registry.register('csv', CSV)
427
format_registry.register('xlsx', XLSX)
428
format_registry.register('json', JSON)
429
format_registry.register('yaml', YAML)
430
431
# Register custom format
432
format_registry.register('xml', XMLFormat)
433
434
# Usage
435
format_class = format_registry.get_format('csv')
436
available_formats = format_registry.list_formats()
437
```
438
439
### Conditional Format Selection
440
441
```python
442
def choose_format_by_size(dataset, prefer_binary=True):
443
"""Choose format based on dataset size."""
444
445
row_count = len(dataset)
446
col_count = len(dataset.headers) if dataset.headers else 0
447
448
# For large datasets, prefer binary formats
449
if row_count > 10000 or col_count > 50:
450
return XLSX() if prefer_binary else CSV()
451
452
# For small datasets, prefer text formats
453
if row_count < 100:
454
return JSON()
455
456
# Default to CSV for medium datasets
457
return CSV()
458
459
# Usage
460
resource = BookResource()
461
dataset = resource.export()
462
format_class = choose_format_by_size(dataset)
463
export_data = format_class.export_data(dataset)
464
```
465
466
### Format Validation
467
468
```python
469
def validate_format_support(format_name):
470
"""Validate that format is supported."""
471
472
try:
473
format_class = get_format_class(format_name)
474
475
# Check if required dependencies are available
476
if hasattr(format_class, 'TABLIB_MODULE'):
477
module_name = format_class.TABLIB_MODULE
478
try:
479
import tablib
480
# Try to access the format
481
getattr(tablib.formats.registry, module_name)
482
except (ImportError, AttributeError):
483
return False, f"Format {format_name} requires additional dependencies"
484
485
return True, f"Format {format_name} is supported"
486
487
except Exception as e:
488
return False, f"Format {format_name} is not supported: {e}"
489
490
# Usage
491
supported, message = validate_format_support('xlsx')
492
if supported:
493
print(f"✓ {message}")
494
else:
495
print(f"✗ {message}")
496
```
497
498
### Multi-Format Export
499
500
```python
501
def export_to_multiple_formats(resource, formats, base_filename):
502
"""Export resource to multiple formats."""
503
504
dataset = resource.export()
505
results = {}
506
507
for format_name in formats:
508
try:
509
format_class = get_format_class(format_name)
510
extension = format_class.get_extension()
511
filename = f"{base_filename}.{extension}"
512
513
export_data = format_class.export_data(dataset)
514
515
mode = 'wb' if format_class.is_binary() else 'w'
516
with open(filename, mode) as f:
517
f.write(export_data)
518
519
results[format_name] = {
520
'filename': filename,
521
'success': True,
522
'size': len(export_data)
523
}
524
525
except Exception as e:
526
results[format_name] = {
527
'filename': None,
528
'success': False,
529
'error': str(e)
530
}
531
532
return results
533
534
# Usage
535
resource = BookResource()
536
results = export_to_multiple_formats(
537
resource,
538
['csv', 'xlsx', 'json'],
539
'books_export'
540
)
541
542
for format_name, result in results.items():
543
if result['success']:
544
print(f"✓ {format_name}: {result['filename']} ({result['size']} bytes)")
545
else:
546
print(f"✗ {format_name}: {result['error']}")
547
```