or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

admin-integration.mdfile-formats.mdforms-ui.mdindex.mdmanagement-commands.mdresources-fields.mdwidgets-transformation.md

file-formats.mddocs/

0

# File Formats

1

2

Support for multiple file formats through tablib integration, including CSV, XLSX, JSON, YAML, and more, with configurable format options.

3

4

## Capabilities

5

6

### Base Format Classes

7

8

Foundation classes for all file format implementations.

9

10

```python { .api }

11

class Format:

12

def get_title(self):

13

"""

14

Get display title for the format.

15

16

Returns:

17

str, format display name

18

"""

19

20

def create_dataset(self, in_stream):

21

"""

22

Create tablib Dataset from input stream.

23

24

Parameters:

25

- in_stream: Input data stream

26

27

Returns:

28

tablib.Dataset instance

29

"""

30

31

def export_data(self, dataset, **kwargs):

32

"""

33

Export dataset to format-specific representation.

34

35

Parameters:

36

- dataset: tablib.Dataset to export

37

- **kwargs: Format-specific export options

38

39

Returns:

40

Formatted export data (bytes or string)

41

"""

42

43

def is_binary(self):

44

"""

45

Check if format produces binary output.

46

47

Returns:

48

bool, True if format is binary

49

"""

50

51

def get_read_mode(self):

52

"""

53

Get file read mode for this format.

54

55

Returns:

56

str, file mode ('rb' or 'r')

57

"""

58

59

def get_extension(self):

60

"""

61

Get file extension for this format.

62

63

Returns:

64

str, file extension (e.g., 'csv', 'xlsx')

65

"""

66

67

def get_content_type(self):

68

"""

69

Get MIME content type for this format.

70

71

Returns:

72

str, MIME content type

73

"""

74

75

class TablibFormat(Format):

76

"""Base class for tablib-integrated formats."""

77

78

TABLIB_MODULE = None

79

80

def create_dataset(self, in_stream):

81

"""Create dataset using tablib's format-specific loader."""

82

83

def export_data(self, dataset, **kwargs):

84

"""Export using tablib's format-specific exporter."""

85

86

def get_title(self):

87

"""Get title from tablib format."""

88

89

class TextFormat(TablibFormat):

90

"""Base class for text-based formats."""

91

92

def is_binary(self):

93

"""Text formats are not binary."""

94

return False

95

96

def get_read_mode(self):

97

"""Text formats use text read mode."""

98

return 'r'

99

```

100

101

### Text Formats

102

103

Implementations for text-based file formats.

104

105

```python { .api }

106

class CSV(TextFormat):

107

"""Comma-separated values format."""

108

109

TABLIB_MODULE = 'csv'

110

111

def get_extension(self):

112

return 'csv'

113

114

def get_content_type(self):

115

return 'text/csv'

116

117

class TSV(TextFormat):

118

"""Tab-separated values format."""

119

120

TABLIB_MODULE = 'tsv'

121

122

def get_extension(self):

123

return 'tsv'

124

125

def get_content_type(self):

126

return 'text/tab-separated-values'

127

128

class JSON(TextFormat):

129

"""JavaScript Object Notation format."""

130

131

TABLIB_MODULE = 'json'

132

133

def get_extension(self):

134

return 'json'

135

136

def get_content_type(self):

137

return 'application/json'

138

139

class YAML(TextFormat):

140

"""YAML Ain't Markup Language format."""

141

142

TABLIB_MODULE = 'yaml'

143

144

def get_extension(self):

145

return 'yaml'

146

147

def get_content_type(self):

148

return 'application/x-yaml'

149

150

class HTML(TextFormat):

151

"""HyperText Markup Language table format."""

152

153

TABLIB_MODULE = 'html'

154

155

def get_extension(self):

156

return 'html'

157

158

def get_content_type(self):

159

return 'text/html'

160

161

class ODS(TextFormat):

162

"""OpenDocument Spreadsheet format."""

163

164

TABLIB_MODULE = 'ods'

165

166

def get_extension(self):

167

return 'ods'

168

169

def get_content_type(self):

170

return 'application/vnd.oasis.opendocument.spreadsheet'

171

```

172

173

### Binary Formats

174

175

Implementations for binary file formats.

176

177

```python { .api }

178

class XLS(TablibFormat):

179

"""Microsoft Excel 97-2003 format."""

180

181

TABLIB_MODULE = 'xls'

182

183

def is_binary(self):

184

return True

185

186

def get_extension(self):

187

return 'xls'

188

189

def get_content_type(self):

190

return 'application/vnd.ms-excel'

191

192

class XLSX(TablibFormat):

193

"""Microsoft Excel 2007+ format."""

194

195

TABLIB_MODULE = 'xlsx'

196

197

def is_binary(self):

198

return True

199

200

def get_extension(self):

201

return 'xlsx'

202

203

def get_content_type(self):

204

return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'

205

```

206

207

### Format Utilities

208

209

Utility functions for working with formats.

210

211

```python { .api }

212

def get_format_class(format_name, file_name=None, encoding=None):

213

"""

214

Get format class from format name or file extension.

215

216

Parameters:

217

- format_name: str, format name or dotted path to format class

218

- file_name: str, optional filename to infer format from extension

219

- encoding: str, optional encoding for text formats

220

221

Returns:

222

Format class instance

223

"""

224

225

def get_default_format_names():

226

"""

227

Get list of default format names.

228

229

Returns:

230

List of available format names

231

"""

232

233

# Available formats

234

DEFAULT_FORMATS = [CSV, XLSX, TSV, ODS, JSON, YAML, HTML, XLS]

235

BINARY_FORMATS = [XLS, XLSX]

236

```

237

238

## Usage Examples

239

240

### Basic Format Usage

241

242

```python

243

from import_export.formats.base_formats import CSV, XLSX, JSON

244

from import_export import resources

245

246

class BookResource(resources.ModelResource):

247

class Meta:

248

model = Book

249

250

# Export to different formats

251

resource = BookResource()

252

dataset = resource.export()

253

254

# CSV export

255

csv_format = CSV()

256

csv_data = csv_format.export_data(dataset)

257

258

# Excel export

259

xlsx_format = XLSX()

260

xlsx_data = xlsx_format.export_data(dataset)

261

262

# JSON export

263

json_format = JSON()

264

json_data = json_format.export_data(dataset)

265

```

266

267

### Import from Different Formats

268

269

```python

270

from import_export.formats.base_formats import CSV, JSON

271

import tablib

272

273

# Import from CSV

274

csv_format = CSV()

275

with open('books.csv', 'r') as f:

276

dataset = csv_format.create_dataset(f.read())

277

278

# Import from JSON

279

json_format = JSON()

280

with open('books.json', 'r') as f:

281

dataset = json_format.create_dataset(f.read())

282

283

# Process import

284

resource = BookResource()

285

result = resource.import_data(dataset, dry_run=True)

286

```

287

288

### Custom Format Implementation

289

290

```python

291

from import_export.formats.base_formats import TextFormat

292

import xml.etree.ElementTree as ET

293

import tablib

294

295

class XMLFormat(TextFormat):

296

"""Custom XML format implementation."""

297

298

def get_title(self):

299

return 'xml'

300

301

def get_extension(self):

302

return 'xml'

303

304

def get_content_type(self):

305

return 'application/xml'

306

307

def create_dataset(self, in_stream):

308

"""Parse XML and create dataset."""

309

root = ET.fromstring(in_stream)

310

311

# Extract headers from first row

312

headers = []

313

rows = []

314

315

for item in root.findall('item'):

316

row = {}

317

for child in item:

318

if child.tag not in headers:

319

headers.append(child.tag)

320

row[child.tag] = child.text

321

rows.append([row.get(h, '') for h in headers])

322

323

dataset = tablib.Dataset()

324

dataset.headers = headers

325

for row in rows:

326

dataset.append(row)

327

328

return dataset

329

330

def export_data(self, dataset, **kwargs):

331

"""Export dataset to XML."""

332

root = ET.Element('data')

333

334

for row in dataset:

335

item = ET.SubElement(root, 'item')

336

for header, value in zip(dataset.headers, row):

337

field = ET.SubElement(item, header)

338

field.text = str(value) if value is not None else ''

339

340

return ET.tostring(root, encoding='unicode')

341

```

342

343

### Dynamic Format Selection

344

345

```python

346

from import_export.command_utils import get_format_class

347

348

def export_with_format(resource, format_name, filename=None):

349

"""Export resource data with specified format."""

350

351

# Get format class dynamically

352

format_class = get_format_class(format_name, filename)

353

354

# Export data

355

dataset = resource.export()

356

export_data = format_class.export_data(dataset)

357

358

# Determine filename if not provided

359

if not filename:

360

extension = format_class.get_extension()

361

filename = f"export.{extension}"

362

363

# Write to file

364

mode = 'wb' if format_class.is_binary() else 'w'

365

with open(filename, mode) as f:

366

f.write(export_data)

367

368

return filename

369

370

# Usage

371

resource = BookResource()

372

export_with_format(resource, 'CSV', 'books.csv')

373

export_with_format(resource, 'XLSX', 'books.xlsx')

374

```

375

376

### Format-Specific Options

377

378

```python

379

class CustomCSVFormat(CSV):

380

"""CSV format with custom options."""

381

382

def export_data(self, dataset, **kwargs):

383

# Custom CSV export options

384

delimiter = kwargs.get('delimiter', ',')

385

quotechar = kwargs.get('quotechar', '"')

386

387

# Use tablib's CSV export with custom options

388

return dataset.export('csv', delimiter=delimiter, quotechar=quotechar)

389

390

# Usage with custom options

391

resource = BookResource()

392

dataset = resource.export()

393

csv_format = CustomCSVFormat()

394

csv_data = csv_format.export_data(

395

dataset,

396

delimiter=';',

397

quotechar="'"

398

)

399

```

400

401

### Format Registry Pattern

402

403

```python

404

class FormatRegistry:

405

"""Registry for managing format classes."""

406

407

def __init__(self):

408

self._formats = {}

409

410

def register(self, name, format_class):

411

"""Register a format class."""

412

self._formats[name.lower()] = format_class

413

414

def get_format(self, name):

415

"""Get format class by name."""

416

return self._formats.get(name.lower())

417

418

def list_formats(self):

419

"""List available format names."""

420

return list(self._formats.keys())

421

422

# Create global registry

423

format_registry = FormatRegistry()

424

425

# Register default formats

426

format_registry.register('csv', CSV)

427

format_registry.register('xlsx', XLSX)

428

format_registry.register('json', JSON)

429

format_registry.register('yaml', YAML)

430

431

# Register custom format

432

format_registry.register('xml', XMLFormat)

433

434

# Usage

435

format_class = format_registry.get_format('csv')

436

available_formats = format_registry.list_formats()

437

```

438

439

### Conditional Format Selection

440

441

```python

442

def choose_format_by_size(dataset, prefer_binary=True):

443

"""Choose format based on dataset size."""

444

445

row_count = len(dataset)

446

col_count = len(dataset.headers) if dataset.headers else 0

447

448

# For large datasets, prefer binary formats

449

if row_count > 10000 or col_count > 50:

450

return XLSX() if prefer_binary else CSV()

451

452

# For small datasets, prefer text formats

453

if row_count < 100:

454

return JSON()

455

456

# Default to CSV for medium datasets

457

return CSV()

458

459

# Usage

460

resource = BookResource()

461

dataset = resource.export()

462

format_class = choose_format_by_size(dataset)

463

export_data = format_class.export_data(dataset)

464

```

465

466

### Format Validation

467

468

```python

469

def validate_format_support(format_name):

470

"""Validate that format is supported."""

471

472

try:

473

format_class = get_format_class(format_name)

474

475

# Check if required dependencies are available

476

if hasattr(format_class, 'TABLIB_MODULE'):

477

module_name = format_class.TABLIB_MODULE

478

try:

479

import tablib

480

# Try to access the format

481

getattr(tablib.formats.registry, module_name)

482

except (ImportError, AttributeError):

483

return False, f"Format {format_name} requires additional dependencies"

484

485

return True, f"Format {format_name} is supported"

486

487

except Exception as e:

488

return False, f"Format {format_name} is not supported: {e}"

489

490

# Usage

491

supported, message = validate_format_support('xlsx')

492

if supported:

493

print(f"✓ {message}")

494

else:

495

print(f"✗ {message}")

496

```

497

498

### Multi-Format Export

499

500

```python

501

def export_to_multiple_formats(resource, formats, base_filename):

502

"""Export resource to multiple formats."""

503

504

dataset = resource.export()

505

results = {}

506

507

for format_name in formats:

508

try:

509

format_class = get_format_class(format_name)

510

extension = format_class.get_extension()

511

filename = f"{base_filename}.{extension}"

512

513

export_data = format_class.export_data(dataset)

514

515

mode = 'wb' if format_class.is_binary() else 'w'

516

with open(filename, mode) as f:

517

f.write(export_data)

518

519

results[format_name] = {

520

'filename': filename,

521

'success': True,

522

'size': len(export_data)

523

}

524

525

except Exception as e:

526

results[format_name] = {

527

'filename': None,

528

'success': False,

529

'error': str(e)

530

}

531

532

return results

533

534

# Usage

535

resource = BookResource()

536

results = export_to_multiple_formats(

537

resource,

538

['csv', 'xlsx', 'json'],

539

'books_export'

540

)

541

542

for format_name, result in results.items():

543

if result['success']:

544

print(f"✓ {format_name}: {result['filename']} ({result['size']} bytes)")

545

else:

546

print(f"✗ {format_name}: {result['error']}")

547

```