A wrapper library to read, manipulate and write data in xls format, supporting xlsx and xlsm formats
npx @tessl/cli install tessl/pypi-pyexcel-xls@0.7.0A Python wrapper library for reading, manipulating, and writing data in Excel XLS format, with support for XLSX and XLSM formats depending on the xlrd version. Built as both a standalone library and a plugin for the pyexcel ecosystem, providing seamless Excel file operations with advanced features like pagination, merged cell detection, and hidden row/column handling.
pip install pyexcel-xlsfrom pyexcel_xls import get_data, save_dataFor internal I/O functionality (re-exported from pyexcel-io):
from pyexcel_xls import read_data, write_data, isstreamFor version checking:
from pyexcel_xls import XLRD_VERSION_2_OR_ABOVE, supported_file_formatsFor utility functions:
from pyexcel_xls.xlsr import xldate_to_python_datefrom pyexcel_xls import get_data, save_data
from collections import OrderedDict
# Read data from an Excel file
data = get_data("input.xls")
print(data) # {"Sheet1": [[1, 2, 3], [4, 5, 6]], "Sheet2": [["a", "b", "c"]]}
# Write data to an Excel file
output_data = OrderedDict([
("Sheet 1", [[1, 2, 3], [4, 5, 6]]),
("Sheet 2", [["row 1", "row 2", "row 3"]])
])
save_data("output.xls", output_data)
# Work with memory streams
from io import BytesIO
# Write to memory
stream = BytesIO()
save_data(stream, output_data)
# Read from memory
stream.seek(0)
memory_data = get_data(stream)pyexcel-xls operates as a bridge between Excel files and Python data structures through a plugin-based architecture:
XLSReader, XLSInFile, XLSInMemory, XLSInContent handle different input sourcesXLSWriter, XLSheetWriter handle XLS file creation using xlwtXLSheet provides unified access to worksheet data with optional featuresThe architecture supports both standalone usage and integration with the broader pyexcel ecosystem, allowing seamless data exchange between Excel files and Python applications.
Read data from XLS, XLSX, and XLSM files (format support depends on xlrd version).
def get_data(afile, file_type=None, **keywords):
"""
Standalone function for reading Excel files.
Parameters:
- afile: str | file-like | bytes - File path, file object, or binary content
- file_type: str | None - File type override (defaults to 'xls' for streams)
- **keywords: dict - Additional options for controlling read behavior
Keyword Arguments:
- start_row: int - Starting row index for pagination (0-based)
- row_limit: int - Maximum number of rows to read
- start_column: int - Starting column index for pagination (0-based)
- column_limit: int - Maximum number of columns to read
- skip_hidden_row_and_column: bool - Skip hidden rows/columns (default: True, XLS only)
- detect_merged_cells: bool - Detect and handle merged cells (default: False)
- skip_hidden_sheets: bool - Skip hidden worksheets (default: True)
- filename: str - For xlrd file opening
- logfile: file-like - For xlrd logging
- verbosity: int - xlrd verbosity level
- use_mmap: bool - Use memory mapping for xlrd
- file_contents: bytes - Direct file content for xlrd
- encoding_override: str - Character encoding override for xlrd
- formatting_info: bool - Preserve formatting information (xlrd)
- on_demand: bool - Load sheets on demand (xlrd)
- ragged_rows: bool - Handle ragged rows (xlrd)
- auto_detect_int: bool - Convert floats without decimals to integers (default: True)
Returns:
dict - Dictionary with sheet names as keys and sheet data as 2D lists
"""Write data to XLS files with automatic formatting for date/time values.
def save_data(afile, data, file_type=None, **keywords):
"""
Standalone function for writing Excel files.
Parameters:
- afile: str | file-like - File path or file object to write to
- data: dict - Dictionary with sheet names as keys and 2D list data as values
- file_type: str | None - File type override (defaults to 'xls' for streams)
- **keywords: dict - Additional options for controlling write behavior
Keyword Arguments:
- encoding: str - Character encoding for XLS files (default: 'ascii')
- style_compression: int - Style compression level (default: 2)
Returns:
None
"""Direct access to pyexcel-io functionality for advanced use cases.
def read_data(afile, file_type=None, **keywords):
"""
Low-level reading function (re-exported from pyexcel_io.io.get_data).
Identical to get_data() but provided for consistency with pyexcel-io API.
Parameters and returns: Same as get_data()
"""
def write_data(afile, data, file_type=None, **keywords):
"""
Low-level writing function (re-exported from pyexcel_io.io.save_data).
Identical to save_data() but provided for consistency with pyexcel-io API.
Parameters and returns: Same as save_data()
"""
def isstream(afile):
"""
Check if an object is a stream.
Parameters:
- afile: any - Object to check
Returns:
bool - True if object is a stream
"""Access version-dependent format support and compatibility information.
XLRD_VERSION_2_OR_ABOVE: bool
# Boolean indicating if xlrd version 2.0 or greater is installed
supported_file_formats: list[str]
# List of supported file formats, varies by xlrd version
# ["xls", "xlsx", "xlsm"] for xlrd < 2.0
# ["xls"] for xlrd >= 2.0
def xlrd_version_2_or_greater():
"""
Check if xlrd version 2.0 or greater is installed.
Returns:
bool - True if xlrd version is 2.0 or greater
"""Read large files in chunks to manage memory usage:
# Read specific row range
partial_data = get_data("large_file.xls", start_row=10, row_limit=50)
# Read specific column range
column_data = get_data("large_file.xls", start_column=2, column_limit=5)
# Combine row and column ranges
subset_data = get_data("large_file.xls",
start_row=10, row_limit=20,
start_column=2, column_limit=8)Handle merged cells by spreading values across all merged cells:
# Enable merged cell detection (may impact performance)
data = get_data("file_with_merged_cells.xls", detect_merged_cells=True)Control visibility of hidden sheets, rows, and columns:
# Include hidden sheets in output
data = get_data("file.xls", skip_hidden_sheets=False)
# Include hidden rows and columns (XLS only)
data = get_data("file.xls", skip_hidden_row_and_column=False)When installed, pyexcel-xls automatically registers as a plugin for pyexcel:
import pyexcel as pe
# Read through pyexcel interface
book = pe.get_book(file_name="data.xls")
sheet = pe.get_sheet(file_name="data.xls")
# Write through pyexcel interface
sheet.save_as("output.xls")Additional utility functions for date handling and type conversion.
def xldate_to_python_date(value, date_mode):
"""
Convert Excel date value to Python date/time object.
Parameters:
- value: float - Excel date serial number
- date_mode: int - Date mode (0 for 1900 system, 1 for 1904 system)
Returns:
datetime.date | datetime.time | datetime.datetime - Converted date/time object
"""# Data structure for Excel workbooks
ExcelData = dict[str, list[list[any]]]
# Dictionary mapping sheet names to 2D lists of cell values
# Supported file formats (varies by xlrd version)
SupportedFormats = list[str] # ["xls", "xlsx", "xlsm"] or ["xls"]
# File input types
FileInput = str | BytesIO | BinaryIO | bytes
# File path, file-like object, or binary content
# Sheet data structure
SheetData = list[list[any]]
# 2D list representing rows and columns of cell values
# Date mode constants (for xldate_to_python_date)
DateMode = int # 0 for 1900 system, 1 for 1904 system
# Cell value types that may be encountered
CellValue = str | int | float | datetime.date | datetime.time | datetime.datetime | bool | None
# Individual cell values after type conversion
# xlrd keyword parameters for advanced reading
XlrdKeywords = dict[str, any]
# Valid keys: filename, logfile, verbosity, use_mmap, file_contents,
# encoding_override, formatting_info, on_demand, ragged_rows