0
# Data Conversion System
1
2
Flexible framework for converting data between Python and Excel formats, with built-in support for pandas, NumPy, and custom conversion pipelines. The conversion system handles the complex task of translating between Excel's cell-based data model and Python's rich data types.
3
4
## Capabilities
5
6
### Base Converter Framework
7
8
The conversion system is built around converter classes that handle reading from and writing to Excel ranges with configurable options.
9
10
```python { .api }
11
# Available in xlwings.conversion module
12
class Converter:
13
"""Base converter class for custom data transformations."""
14
15
def __init__(self, **options):
16
"""
17
Initialize converter with options.
18
19
Args:
20
**options: Converter-specific configuration options.
21
"""
22
23
def read_value(self, value, options):
24
"""
25
Convert Excel data to Python format.
26
27
Args:
28
value: Raw data from Excel range.
29
options: Conversion options dictionary.
30
31
Returns:
32
Converted Python data structure.
33
"""
34
35
def write_value(self, value, options):
36
"""
37
Convert Python data to Excel format.
38
39
Args:
40
value: Python data to convert.
41
options: Conversion options dictionary.
42
43
Returns:
44
Data structure suitable for Excel range.
45
"""
46
47
class RawConverter(Converter):
48
"""No conversion - pass data through unchanged."""
49
50
class DictConverter(Converter):
51
"""Convert between Excel ranges and Python dictionaries."""
52
53
class NumpyArrayConverter(Converter):
54
"""Convert between Excel ranges and NumPy arrays."""
55
56
class PandasDataFrameConverter(Converter):
57
"""Convert between Excel ranges and pandas DataFrames."""
58
59
class PandasSeriesConverter(Converter):
60
"""Convert between Excel ranges and pandas Series."""
61
62
class PolarsDataFrameConverter(Converter):
63
"""Convert between Excel ranges and Polars DataFrames (PRO)."""
64
65
class PolarsSeriesConverter(Converter):
66
"""Convert between Excel ranges and Polars Series (PRO)."""
67
68
class OrderedDictConverter(Converter):
69
"""Convert between Excel ranges and OrderedDict objects."""
70
71
class DatetimeConverter(Converter):
72
"""Convert between Excel dates and Python datetime objects."""
73
74
class DateConverter(Converter):
75
"""Convert between Excel dates and Python date objects."""
76
```
77
78
### Built-in Converters
79
80
xlwings provides several built-in converters for common data types and structures:
81
82
```python
83
import xlwings as xw
84
import numpy as np
85
import pandas as pd
86
87
# Raw converter (no conversion)
88
ws = xw.sheets.active
89
ws.range('A1:C3').options(convert=None).value = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
90
raw_data = ws.range('A1:C3').options(convert=None).value
91
92
# NumPy array converter
93
array = np.random.rand(5, 3)
94
ws.range('A1').options(np.array).value = array
95
read_array = ws.range('A1:C5').options(np.array).value
96
97
# pandas DataFrame converter
98
df = pd.DataFrame({
99
'Name': ['Alice', 'Bob', 'Charlie'],
100
'Age': [25, 30, 35],
101
'Salary': [50000, 60000, 70000]
102
})
103
ws.range('E1').options(pd.DataFrame, header=True, index=False).value = df
104
read_df = ws.range('E1:G4').options(pd.DataFrame, header=True, index=False).value
105
106
# pandas Series converter
107
series = pd.Series([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])
108
ws.range('I1').options(pd.Series, header=True).value = series
109
```
110
111
### Conversion Options
112
113
The conversion system supports extensive configuration through options that control data transformation behavior:
114
115
```python { .api }
116
# Common conversion options available across converters:
117
118
# Dimensional control
119
ndim: int # Force specific number of dimensions (1 or 2)
120
transpose: bool # Transpose data orientation (swap rows/columns)
121
122
# Data type handling
123
numbers: int # How to handle numbers (0=float/int, 1=float)
124
dates: str # Date conversion ('datetime', 'date', None)
125
empty: str # Empty cell representation ('', None, NaN)
126
expand: str # Dynamic expansion ('table', 'down', 'right')
127
128
# pandas-specific options
129
header: bool # Include column headers
130
index: bool # Include row index
131
dtype: dict # Column data types
132
parse_dates: list # Columns to parse as dates
133
134
# NumPy-specific options
135
dtype: str # Array data type ('float64', 'int32', etc.)
136
order: str # Memory layout ('C', 'F')
137
```
138
139
Usage examples with options:
140
141
```python
142
import xlwings as xw
143
import pandas as pd
144
import numpy as np
145
146
ws = xw.sheets.active
147
148
# DataFrame with custom options
149
df = pd.DataFrame({
150
'Date': pd.date_range('2024-01-01', periods=10),
151
'Value': np.random.randn(10),
152
'Category': ['A', 'B'] * 5
153
})
154
155
# Write with headers but no index
156
ws.range('A1').options(pd.DataFrame, header=True, index=False).value = df
157
158
# Read with specific data types
159
df_back = ws.range('A1:C11').options(
160
pd.DataFrame,
161
header=True,
162
index=False,
163
dtype={'Category': 'category'},
164
parse_dates=['Date']
165
).value
166
167
# NumPy array with specific dtype
168
arr = np.random.randint(0, 100, (5, 5))
169
ws.range('F1').options(np.array, dtype='int32').value = arr
170
171
# Transposed data
172
ws.range('L1').options(transpose=True).value = [[1, 2, 3], [4, 5, 6]]
173
174
# Dynamic expansion
175
ws.range('P1').options(expand='table').value = df
176
```
177
178
### Custom Converters
179
180
Create custom converters for specialized data transformations:
181
182
```python
183
import xlwings as xw
184
from xlwings.conversion import Converter
185
186
class JSONConverter(Converter):
187
"""Convert between Excel ranges and JSON strings."""
188
189
def read_value(self, value, options):
190
import json
191
if isinstance(value, str):
192
try:
193
return json.loads(value)
194
except json.JSONDecodeError:
195
return value
196
return value
197
198
def write_value(self, value, options):
199
import json
200
if isinstance(value, (dict, list)):
201
return json.dumps(value, indent=2)
202
return value
203
204
class DateTimeRangeConverter(Converter):
205
"""Convert date ranges to formatted strings."""
206
207
def read_value(self, value, options):
208
from datetime import datetime
209
if isinstance(value, list) and len(value) == 2:
210
start, end = value
211
if isinstance(start, datetime) and isinstance(end, datetime):
212
return f"{start.strftime('%Y-%m-%d')} to {end.strftime('%Y-%m-%d')}"
213
return value
214
215
def write_value(self, value, options):
216
if isinstance(value, str) and ' to ' in value:
217
from datetime import datetime
218
start_str, end_str = value.split(' to ')
219
start = datetime.strptime(start_str.strip(), '%Y-%m-%d')
220
end = datetime.strptime(end_str.strip(), '%Y-%m-%d')
221
return [start, end]
222
return value
223
224
# Register and use custom converters
225
ws = xw.sheets.active
226
227
# Use JSON converter
228
json_data = {"name": "John", "age": 30, "city": "New York"}
229
ws.range('A1').options(JSONConverter()).value = json_data
230
read_json = ws.range('A1').options(JSONConverter()).value
231
232
# Use date range converter
233
date_range = "2024-01-01 to 2024-12-31"
234
ws.range('C1:D1').options(DateTimeRangeConverter()).value = date_range
235
```
236
237
### Advanced Conversion Patterns
238
239
Complex conversion scenarios and patterns for handling specialized data:
240
241
```python
242
import xlwings as xw
243
import pandas as pd
244
import numpy as np
245
246
# Pattern 1: Multi-sheet DataFrame operations
247
def write_dataframe_sheets(dataframes_dict, workbook):
248
"""Write multiple DataFrames to separate sheets."""
249
for sheet_name, df in dataframes_dict.items():
250
if sheet_name in [s.name for s in workbook.sheets]:
251
ws = workbook.sheets[sheet_name]
252
else:
253
ws = workbook.sheets.add(sheet_name)
254
255
ws.range('A1').options(
256
pd.DataFrame,
257
header=True,
258
index=False
259
).value = df
260
261
# Pattern 2: Incremental data loading
262
def append_dataframe_data(df, sheet, start_row=None):
263
"""Append DataFrame to existing Excel data."""
264
if start_row is None:
265
# Find last row with data
266
last_row = sheet.used_range.last_cell.row
267
start_row = last_row + 1
268
269
# Write data without headers
270
sheet.range(f'A{start_row}').options(
271
pd.DataFrame,
272
header=False,
273
index=False
274
).value = df
275
276
# Pattern 3: Chunked data processing
277
def process_large_dataset(sheet, chunk_size=1000):
278
"""Process large Excel dataset in chunks."""
279
used_range = sheet.used_range
280
total_rows = used_range.shape[0]
281
282
results = []
283
for start_row in range(1, total_rows + 1, chunk_size):
284
end_row = min(start_row + chunk_size - 1, total_rows)
285
286
# Read chunk
287
chunk_range = sheet.range(f'A{start_row}:Z{end_row}')
288
chunk_df = chunk_range.options(pd.DataFrame).value
289
290
# Process chunk
291
processed = chunk_df.apply(lambda x: x * 2) # Example operation
292
results.append(processed)
293
294
return pd.concat(results, ignore_index=True)
295
296
# Pattern 4: Mixed data type handling
297
def handle_mixed_data_types(range_obj):
298
"""Handle ranges with mixed data types."""
299
raw_data = range_obj.options(convert=None).value
300
301
processed_data = []
302
for row in raw_data:
303
processed_row = []
304
for cell in row:
305
if isinstance(cell, str):
306
# Try to parse as number
307
try:
308
processed_row.append(float(cell))
309
except ValueError:
310
processed_row.append(cell)
311
elif cell is None:
312
processed_row.append(np.nan)
313
else:
314
processed_row.append(cell)
315
processed_data.append(processed_row)
316
317
return processed_data
318
```
319
320
### Performance Optimization
321
322
Conversion best practices for optimal performance with large datasets:
323
324
```python
325
import xlwings as xw
326
import pandas as pd
327
import numpy as np
328
329
# Best practice 1: Batch operations
330
def efficient_multi_range_read(sheet, ranges):
331
"""Read multiple ranges efficiently."""
332
# Read all ranges in one operation when possible
333
data = {}
334
for name, address in ranges.items():
335
data[name] = sheet.range(address).options(np.array).value
336
return data
337
338
# Best practice 2: Minimize Excel interactions
339
def efficient_dataframe_write(df, sheet, start_cell='A1'):
340
"""Efficiently write DataFrame with minimal Excel calls."""
341
# Single write operation instead of cell-by-cell
342
sheet.range(start_cell).options(
343
pd.DataFrame,
344
header=True,
345
index=False,
346
expand='table'
347
).value = df
348
349
# Best practice 3: Use appropriate converters
350
def choose_optimal_converter(data):
351
"""Choose the most efficient converter for data type."""
352
if isinstance(data, pd.DataFrame):
353
return pd.DataFrame
354
elif isinstance(data, np.ndarray):
355
return np.array
356
elif isinstance(data, list):
357
return None # Raw converter for lists
358
else:
359
return None
360
361
# Best practice 4: Memory-efficient large data handling
362
def memory_efficient_processing(sheet, output_sheet):
363
"""Process large data without loading everything into memory."""
364
chunk_size = 10000
365
used_range = sheet.used_range
366
total_rows = used_range.shape[0]
367
368
for i in range(0, total_rows, chunk_size):
369
# Process in chunks to manage memory
370
start_row = i + 1
371
end_row = min(i + chunk_size, total_rows)
372
373
chunk = sheet.range(f'A{start_row}:Z{end_row}').options(np.array).value
374
processed_chunk = np.square(chunk) # Example processing
375
376
# Write processed chunk
377
output_sheet.range(f'A{start_row}').options(np.array).value = processed_chunk
378
```
379
380
## Types
381
382
```python { .api }
383
# Converter type definitions
384
ConverterClass = type[Converter]
385
ConversionOptions = dict[str, Any]
386
387
# Built-in converter types
388
RawConverterType = type[RawConverter]
389
DictConverterType = type[DictConverter]
390
NumpyConverterType = type[NumpyArrayConverter]
391
PandasDFConverterType = type[PandasDataFrameConverter]
392
PandasSeriesConverterType = type[PandasSeriesConverter]
393
394
# Option type definitions
395
ConvertOption = Union[ConverterClass, None]
396
TransposeOption = bool
397
NDimOption = Literal[1, 2]
398
NumbersOption = Literal[0, 1]
399
DatesOption = Union[Literal['datetime', 'date'], None]
400
EmptyOption = Union[str, None, float]
401
ExpandOption = Literal['table', 'down', 'right']
402
```