Iterative JSON parser with standard Python iterator interfaces for processing large JSON data streams without loading entire documents into memory
—
Backend selection and configuration utilities for optimizing performance based on available libraries and specific requirements. ijson's multi-backend architecture allows it to automatically select the fastest available JSON parsing implementation while maintaining a consistent API.
Get a specific backend by name, useful for forcing a particular implementation or testing different backends.
def get_backend(backend):
"""
Import and return specified backend module.
Parameters:
- backend (str): Backend name ('yajl2_c', 'yajl2_cffi', 'yajl2', 'yajl', 'python')
Returns:
Backend module with parsing functions
Raises:
- ImportError: If backend is not available
"""Usage Examples:
import ijson
# Force use of pure Python backend
try:
python_backend = ijson.get_backend('python')
items = python_backend.items(json_data, 'data.item')
for item in items:
process(item)
except ImportError:
print("Python backend not available")
# Try fastest backend first
for backend_name in ['yajl2_c', 'yajl2_cffi', 'python']:
try:
backend = ijson.get_backend(backend_name)
print(f"Using backend: {backend_name}")
break
except ImportError:
continueAccess information about the currently selected backend and available backends.
ALL_BACKENDS: tuple
"""
All supported backends in descending order of speed.
Value: ('yajl2_c', 'yajl2_cffi', 'yajl2', 'yajl', 'python')
"""
backend: object
"""
Currently selected backend instance.
Contains all parsing functions (parse, items, kvitems, etc.)
"""
backend_name: str
"""
Name of the currently loaded backend.
One of: 'yajl2_c', 'yajl2_cffi', 'yajl2', 'yajl', 'python'
"""Usage Examples:
import ijson
# Check current backend
print(f"Current backend: {ijson.backend_name}")
print(f"Available backends: {ijson.ALL_BACKENDS}")
# Get backend capabilities
current_backend = ijson.backend
if hasattr(current_backend, 'capabilities'):
caps = current_backend.capabilities
print(f"C-style comments: {caps.c_comments}")
print(f"Multiple values: {caps.multiple_values}")C extension using YAJL 2.x library. Provides the best performance but requires compilation during installation.
# Automatically selected if available
import ijson
print(ijson.backend_name) # 'yajl2_c'
# Or force selection
backend = ijson.get_backend('yajl2_c')Characteristics:
CFFI-based binding to YAJL 2.x library. Good performance without requiring C compiler.
# Force CFFI backend
backend = ijson.get_backend('yajl2_cffi')Characteristics:
ctypes-based binding to YAJL 2.x library. Moderate performance with maximum compatibility.
# Force ctypes backend
backend = ijson.get_backend('yajl2')Characteristics:
ctypes-based binding to YAJL 1.x library. Provided for compatibility with older YAJL installations.
# Force legacy YAJL backend
backend = ijson.get_backend('yajl')Characteristics:
Pure Python implementation. Always available but slowest performance.
# Force pure Python backend
backend = ijson.get_backend('python')Characteristics:
Force a specific backend using environment variable:
# Force pure Python backend
export IJSON_BACKEND=python
python your_script.py
# Force fastest C backend
export IJSON_BACKEND=yajl2_c
python your_script.pyimport os
import ijson
# Check if environment override is set
if 'IJSON_BACKEND' in os.environ:
print(f"Backend forced to: {os.environ['IJSON_BACKEND']}")
print(f"Using backend: {ijson.backend_name}")Specify custom YAJL library location:
# Use custom YAJL library
export YAJL_DLL=/usr/local/lib/libyajl.so.2
python your_script.pyEach backend supports different feature sets through the capabilities system:
class BackendCapabilities:
"""
Capabilities supported by a backend.
"""
c_comments: bool # C-style comments (non-standard JSON)
multiple_values: bool # Multiple top-level values
invalid_leading_zeros_detection: bool # Leading zeros detection
incomplete_json_tokens_detection: bool # Incomplete token detection
int64: bool # 64-bit integer support with use_float=TrueUsage Examples:
import ijson
# Check backend capabilities
caps = ijson.backend.capabilities
print(f"Supports C comments: {caps.c_comments}")
print(f"Supports multiple values: {caps.multiple_values}")
print(f"Detects invalid leading zeros: {caps.invalid_leading_zeros_detection}")
# Use capabilities to enable features
if caps.multiple_values:
# Parse multiple JSON values in stream
items = ijson.parse(stream, multiple_values=True)
if caps.c_comments:
# Parse JSON with C-style comments
data = '{"key": "value" /* comment */}'
result = list(ijson.items(data, ''))Typical performance characteristics (relative to pure Python):
| Backend | Speed Multiplier | Compilation Required | Dependencies |
|---|---|---|---|
| yajl2_c | 10-20x | Yes (C compiler) | None (wheels available) |
| yajl2_cffi | 5-15x | No | CFFI, YAJL |
| yajl2 | 3-8x | No | YAJL 2.x |
| yajl | 3-8x | No | YAJL 1.x |
| python | 1x (baseline) | No | None |
import time
import ijson
def benchmark_backend(backend_name, data, iterations=1000):
try:
backend = ijson.get_backend(backend_name)
start_time = time.time()
for _ in range(iterations):
list(backend.items(data, 'items.item'))
elapsed = time.time() - start_time
return elapsed
except ImportError:
return None
# Benchmark all available backends
test_data = '{"items": [' + ','.join([f'{{"id": {i}}}' for i in range(100)]) + ']}'
results = {}
for backend_name in ijson.ALL_BACKENDS:
elapsed = benchmark_backend(backend_name, test_data)
if elapsed is not None:
results[backend_name] = elapsed
print(f"{backend_name}: {elapsed:.4f}s")
# Find fastest backend
if results:
fastest = min(results.keys(), key=lambda k: results[k])
print(f"Fastest available backend: {fastest}")import ijson
from ijson.backends import YAJLImportError
try:
backend = ijson.get_backend('yajl2_c')
except ImportError as e:
print(f"C backend not available: {e}")
# Fall back to pure Python
backend = ijson.get_backend('python')
try:
backend = ijson.get_backend('yajl2')
except YAJLImportError as e:
print(f"YAJL version issue: {e}")import ijson
# Check for specific backend features
backend = ijson.backend
if hasattr(backend, 'capabilities'):
if backend.capabilities.int64:
# Safe to use large integers with use_float=True
data = ijson.parse(source, use_float=True)
else:
# Use Decimal for precision
data = ijson.parse(source)import ijson
import os
def get_optimal_backend():
"""Select best backend based on environment and requirements"""
# Check environment override
if 'IJSON_BACKEND' in os.environ:
return ijson.get_backend(os.environ['IJSON_BACKEND'])
# Prefer compiled backends for production
if os.environ.get('ENVIRONMENT') == 'production':
for backend_name in ['yajl2_c', 'yajl2_cffi']:
try:
return ijson.get_backend(backend_name)
except ImportError:
continue
# Use pure Python for development/debugging
if os.environ.get('ENVIRONMENT') == 'development':
return ijson.get_backend('python')
# Default: use whatever ijson selected
return ijson.backend
# Use optimal backend
optimal_backend = get_optimal_backend()
print(f"Selected backend: {optimal_backend.backend_name}")import ijson
def select_backend_for_features(need_comments=False, need_multiple_values=False):
"""Select backend based on required features"""
for backend_name in ijson.ALL_BACKENDS:
try:
backend = ijson.get_backend(backend_name)
caps = backend.capabilities
if need_comments and not caps.c_comments:
continue
if need_multiple_values and not caps.multiple_values:
continue
return backend
except ImportError:
continue
raise RuntimeError("No backend supports required features")
# Select backend that supports C-style comments
backend = select_backend_for_features(need_comments=True)
print(f"Using backend with comment support: {backend.backend_name}")Install with Tessl CLI
npx tessl i tessl/pypi-ijson