or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

charts-visualization.mdcli.mdcollections-functions.mdconstants.mdconversion.mdcore-objects.mdindex.mdpro-features.mdudfs.mdutilities.md

conversion.mddocs/

0

# Data Conversion System

1

2

Flexible framework for converting data between Python and Excel formats, with built-in support for pandas, NumPy, and custom conversion pipelines. The conversion system handles the complex task of translating between Excel's cell-based data model and Python's rich data types.

3

4

## Capabilities

5

6

### Base Converter Framework

7

8

The conversion system is built around converter classes that handle reading from and writing to Excel ranges with configurable options.

9

10

```python { .api }

11

# Available in xlwings.conversion module

12

class Converter:

13

"""Base converter class for custom data transformations."""

14

15

def __init__(self, **options):

16

"""

17

Initialize converter with options.

18

19

Args:

20

**options: Converter-specific configuration options.

21

"""

22

23

def read_value(self, value, options):

24

"""

25

Convert Excel data to Python format.

26

27

Args:

28

value: Raw data from Excel range.

29

options: Conversion options dictionary.

30

31

Returns:

32

Converted Python data structure.

33

"""

34

35

def write_value(self, value, options):

36

"""

37

Convert Python data to Excel format.

38

39

Args:

40

value: Python data to convert.

41

options: Conversion options dictionary.

42

43

Returns:

44

Data structure suitable for Excel range.

45

"""

46

47

class RawConverter(Converter):

48

"""No conversion - pass data through unchanged."""

49

50

class DictConverter(Converter):

51

"""Convert between Excel ranges and Python dictionaries."""

52

53

class NumpyArrayConverter(Converter):

54

"""Convert between Excel ranges and NumPy arrays."""

55

56

class PandasDataFrameConverter(Converter):

57

"""Convert between Excel ranges and pandas DataFrames."""

58

59

class PandasSeriesConverter(Converter):

60

"""Convert between Excel ranges and pandas Series."""

61

62

class PolarsDataFrameConverter(Converter):

63

"""Convert between Excel ranges and Polars DataFrames (PRO)."""

64

65

class PolarsSeriesConverter(Converter):

66

"""Convert between Excel ranges and Polars Series (PRO)."""

67

68

class OrderedDictConverter(Converter):

69

"""Convert between Excel ranges and OrderedDict objects."""

70

71

class DatetimeConverter(Converter):

72

"""Convert between Excel dates and Python datetime objects."""

73

74

class DateConverter(Converter):

75

"""Convert between Excel dates and Python date objects."""

76

```

77

78

### Built-in Converters

79

80

xlwings provides several built-in converters for common data types and structures:

81

82

```python

83

import xlwings as xw

84

import numpy as np

85

import pandas as pd

86

87

# Raw converter (no conversion)

88

ws = xw.sheets.active

89

ws.range('A1:C3').options(convert=None).value = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

90

raw_data = ws.range('A1:C3').options(convert=None).value

91

92

# NumPy array converter

93

array = np.random.rand(5, 3)

94

ws.range('A1').options(np.array).value = array

95

read_array = ws.range('A1:C5').options(np.array).value

96

97

# pandas DataFrame converter

98

df = pd.DataFrame({

99

'Name': ['Alice', 'Bob', 'Charlie'],

100

'Age': [25, 30, 35],

101

'Salary': [50000, 60000, 70000]

102

})

103

ws.range('E1').options(pd.DataFrame, header=True, index=False).value = df

104

read_df = ws.range('E1:G4').options(pd.DataFrame, header=True, index=False).value

105

106

# pandas Series converter

107

series = pd.Series([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])

108

ws.range('I1').options(pd.Series, header=True).value = series

109

```

110

111

### Conversion Options

112

113

The conversion system supports extensive configuration through options that control data transformation behavior:

114

115

```python { .api }

116

# Common conversion options available across converters:

117

118

# Dimensional control

119

ndim: int # Force specific number of dimensions (1 or 2)

120

transpose: bool # Transpose data orientation (swap rows/columns)

121

122

# Data type handling

123

numbers: int # How to handle numbers (0=float/int, 1=float)

124

dates: str # Date conversion ('datetime', 'date', None)

125

empty: str # Empty cell representation ('', None, NaN)

126

expand: str # Dynamic expansion ('table', 'down', 'right')

127

128

# pandas-specific options

129

header: bool # Include column headers

130

index: bool # Include row index

131

dtype: dict # Column data types

132

parse_dates: list # Columns to parse as dates

133

134

# NumPy-specific options

135

dtype: str # Array data type ('float64', 'int32', etc.)

136

order: str # Memory layout ('C', 'F')

137

```

138

139

Usage examples with options:

140

141

```python

142

import xlwings as xw

143

import pandas as pd

144

import numpy as np

145

146

ws = xw.sheets.active

147

148

# DataFrame with custom options

149

df = pd.DataFrame({

150

'Date': pd.date_range('2024-01-01', periods=10),

151

'Value': np.random.randn(10),

152

'Category': ['A', 'B'] * 5

153

})

154

155

# Write with headers but no index

156

ws.range('A1').options(pd.DataFrame, header=True, index=False).value = df

157

158

# Read with specific data types

159

df_back = ws.range('A1:C11').options(

160

pd.DataFrame,

161

header=True,

162

index=False,

163

dtype={'Category': 'category'},

164

parse_dates=['Date']

165

).value

166

167

# NumPy array with specific dtype

168

arr = np.random.randint(0, 100, (5, 5))

169

ws.range('F1').options(np.array, dtype='int32').value = arr

170

171

# Transposed data

172

ws.range('L1').options(transpose=True).value = [[1, 2, 3], [4, 5, 6]]

173

174

# Dynamic expansion

175

ws.range('P1').options(expand='table').value = df

176

```

177

178

### Custom Converters

179

180

Create custom converters for specialized data transformations:

181

182

```python

183

import xlwings as xw

184

from xlwings.conversion import Converter

185

186

class JSONConverter(Converter):

187

"""Convert between Excel ranges and JSON strings."""

188

189

def read_value(self, value, options):

190

import json

191

if isinstance(value, str):

192

try:

193

return json.loads(value)

194

except json.JSONDecodeError:

195

return value

196

return value

197

198

def write_value(self, value, options):

199

import json

200

if isinstance(value, (dict, list)):

201

return json.dumps(value, indent=2)

202

return value

203

204

class DateTimeRangeConverter(Converter):

205

"""Convert date ranges to formatted strings."""

206

207

def read_value(self, value, options):

208

from datetime import datetime

209

if isinstance(value, list) and len(value) == 2:

210

start, end = value

211

if isinstance(start, datetime) and isinstance(end, datetime):

212

return f"{start.strftime('%Y-%m-%d')} to {end.strftime('%Y-%m-%d')}"

213

return value

214

215

def write_value(self, value, options):

216

if isinstance(value, str) and ' to ' in value:

217

from datetime import datetime

218

start_str, end_str = value.split(' to ')

219

start = datetime.strptime(start_str.strip(), '%Y-%m-%d')

220

end = datetime.strptime(end_str.strip(), '%Y-%m-%d')

221

return [start, end]

222

return value

223

224

# Register and use custom converters

225

ws = xw.sheets.active

226

227

# Use JSON converter

228

json_data = {"name": "John", "age": 30, "city": "New York"}

229

ws.range('A1').options(JSONConverter()).value = json_data

230

read_json = ws.range('A1').options(JSONConverter()).value

231

232

# Use date range converter

233

date_range = "2024-01-01 to 2024-12-31"

234

ws.range('C1:D1').options(DateTimeRangeConverter()).value = date_range

235

```

236

237

### Advanced Conversion Patterns

238

239

Complex conversion scenarios and patterns for handling specialized data:

240

241

```python

242

import xlwings as xw

243

import pandas as pd

244

import numpy as np

245

246

# Pattern 1: Multi-sheet DataFrame operations

247

def write_dataframe_sheets(dataframes_dict, workbook):

248

"""Write multiple DataFrames to separate sheets."""

249

for sheet_name, df in dataframes_dict.items():

250

if sheet_name in [s.name for s in workbook.sheets]:

251

ws = workbook.sheets[sheet_name]

252

else:

253

ws = workbook.sheets.add(sheet_name)

254

255

ws.range('A1').options(

256

pd.DataFrame,

257

header=True,

258

index=False

259

).value = df

260

261

# Pattern 2: Incremental data loading

262

def append_dataframe_data(df, sheet, start_row=None):

263

"""Append DataFrame to existing Excel data."""

264

if start_row is None:

265

# Find last row with data

266

last_row = sheet.used_range.last_cell.row

267

start_row = last_row + 1

268

269

# Write data without headers

270

sheet.range(f'A{start_row}').options(

271

pd.DataFrame,

272

header=False,

273

index=False

274

).value = df

275

276

# Pattern 3: Chunked data processing

277

def process_large_dataset(sheet, chunk_size=1000):

278

"""Process large Excel dataset in chunks."""

279

used_range = sheet.used_range

280

total_rows = used_range.shape[0]

281

282

results = []

283

for start_row in range(1, total_rows + 1, chunk_size):

284

end_row = min(start_row + chunk_size - 1, total_rows)

285

286

# Read chunk

287

chunk_range = sheet.range(f'A{start_row}:Z{end_row}')

288

chunk_df = chunk_range.options(pd.DataFrame).value

289

290

# Process chunk

291

processed = chunk_df.apply(lambda x: x * 2) # Example operation

292

results.append(processed)

293

294

return pd.concat(results, ignore_index=True)

295

296

# Pattern 4: Mixed data type handling

297

def handle_mixed_data_types(range_obj):

298

"""Handle ranges with mixed data types."""

299

raw_data = range_obj.options(convert=None).value

300

301

processed_data = []

302

for row in raw_data:

303

processed_row = []

304

for cell in row:

305

if isinstance(cell, str):

306

# Try to parse as number

307

try:

308

processed_row.append(float(cell))

309

except ValueError:

310

processed_row.append(cell)

311

elif cell is None:

312

processed_row.append(np.nan)

313

else:

314

processed_row.append(cell)

315

processed_data.append(processed_row)

316

317

return processed_data

318

```

319

320

### Performance Optimization

321

322

Conversion best practices for optimal performance with large datasets:

323

324

```python

325

import xlwings as xw

326

import pandas as pd

327

import numpy as np

328

329

# Best practice 1: Batch operations

330

def efficient_multi_range_read(sheet, ranges):

331

"""Read multiple ranges efficiently."""

332

# Read all ranges in one operation when possible

333

data = {}

334

for name, address in ranges.items():

335

data[name] = sheet.range(address).options(np.array).value

336

return data

337

338

# Best practice 2: Minimize Excel interactions

339

def efficient_dataframe_write(df, sheet, start_cell='A1'):

340

"""Efficiently write DataFrame with minimal Excel calls."""

341

# Single write operation instead of cell-by-cell

342

sheet.range(start_cell).options(

343

pd.DataFrame,

344

header=True,

345

index=False,

346

expand='table'

347

).value = df

348

349

# Best practice 3: Use appropriate converters

350

def choose_optimal_converter(data):

351

"""Choose the most efficient converter for data type."""

352

if isinstance(data, pd.DataFrame):

353

return pd.DataFrame

354

elif isinstance(data, np.ndarray):

355

return np.array

356

elif isinstance(data, list):

357

return None # Raw converter for lists

358

else:

359

return None

360

361

# Best practice 4: Memory-efficient large data handling

362

def memory_efficient_processing(sheet, output_sheet):

363

"""Process large data without loading everything into memory."""

364

chunk_size = 10000

365

used_range = sheet.used_range

366

total_rows = used_range.shape[0]

367

368

for i in range(0, total_rows, chunk_size):

369

# Process in chunks to manage memory

370

start_row = i + 1

371

end_row = min(i + chunk_size, total_rows)

372

373

chunk = sheet.range(f'A{start_row}:Z{end_row}').options(np.array).value

374

processed_chunk = np.square(chunk) # Example processing

375

376

# Write processed chunk

377

output_sheet.range(f'A{start_row}').options(np.array).value = processed_chunk

378

```

379

380

## Types

381

382

```python { .api }

383

# Converter type definitions

384

ConverterClass = type[Converter]

385

ConversionOptions = dict[str, Any]

386

387

# Built-in converter types

388

RawConverterType = type[RawConverter]

389

DictConverterType = type[DictConverter]

390

NumpyConverterType = type[NumpyArrayConverter]

391

PandasDFConverterType = type[PandasDataFrameConverter]

392

PandasSeriesConverterType = type[PandasSeriesConverter]

393

394

# Option type definitions

395

ConvertOption = Union[ConverterClass, None]

396

TransposeOption = bool

397

NDimOption = Literal[1, 2]

398

NumbersOption = Literal[0, 1]

399

DatesOption = Union[Literal['datetime', 'date'], None]

400

EmptyOption = Union[str, None, float]

401

ExpandOption = Literal['table', 'down', 'right']

402

```