Iterative JSON parser with standard Python iterator interfaces for processing large JSON data streams without loading entire documents into memory
—
Core parsing functions that provide the most common and convenient ways to extract data from JSON streams. These functions handle JSON parsing at the object and key-value level, abstracting away low-level parsing details.
Extracts complete Python objects from JSON streams under a specified prefix path. This is the most commonly used function for processing JSON arrays and nested objects.
def items(source, prefix, map_type=None, buf_size=64*1024, **config):
"""
Yield complete Python objects found under specified prefix.
Parameters:
- source: File-like object, string, bytes, or iterable containing JSON data
- prefix (str): JSON path prefix targeting the objects to extract
- map_type (type, optional): Custom mapping type for objects (default: dict)
- buf_size (int): Buffer size for reading file data (default: 64*1024)
- **config: Backend-specific configuration options
Returns:
Generator yielding Python objects (dict, list, str, int, float, bool, None)
Raises:
- JSONError: For malformed JSON
- IncompleteJSONError: For truncated JSON data
"""Usage Examples:
import ijson
# Extract array items
json_data = '{"products": [{"id": 1, "name": "Laptop"}, {"id": 2, "name": "Phone"}]}'
products = ijson.items(json_data, 'products.item')
for product in products:
print(f"Product {product['id']}: {product['name']}")
# Extract nested objects
json_data = '{"data": {"users": {"alice": {"age": 30}, "bob": {"age": 25}}}}'
user_data = ijson.items(json_data, 'data.users')
for users_dict in user_data:
for name, info in users_dict.items():
print(f"{name}: {info['age']} years old")
# Process large JSON files
with open('large_dataset.json', 'rb') as file:
records = ijson.items(file, 'records.item')
for record in records:
process_record(record)Extracts key-value pairs from JSON objects under a specified prefix. Useful when you need to iterate over object properties without loading the entire object into memory.
def kvitems(source, prefix, map_type=None, buf_size=64*1024, **config):
"""
Yield (key, value) pairs from JSON objects under prefix.
Parameters:
- source: File-like object, string, bytes, or iterable containing JSON data
- prefix (str): JSON path prefix targeting the objects to extract pairs from
- map_type (type, optional): Custom mapping type for nested objects (default: dict)
- buf_size (int): Buffer size for reading file data (default: 64*1024)
- **config: Backend-specific configuration options
Returns:
Generator yielding (key, value) tuples where key is str and value is Python object
Raises:
- JSONError: For malformed JSON
- IncompleteJSONError: For truncated JSON data
"""Usage Examples:
import ijson
# Extract configuration key-value pairs
json_data = '{"config": {"debug": true, "timeout": 30, "max_retries": 3}}'
config_items = ijson.kvitems(json_data, 'config')
for key, value in config_items:
print(f"Config {key}: {value}")
# Process object properties from large files
with open('settings.json', 'rb') as file:
settings = ijson.kvitems(file, 'application.settings')
for setting_name, setting_value in settings:
apply_setting(setting_name, setting_value)Provides parsing events with full path context, giving you complete control over JSON processing while maintaining memory efficiency.
def parse(source, buf_size=64*1024, **config):
"""
Yield (prefix, event, value) tuples with path context.
Parameters:
- source: File-like object, string, bytes, or iterable containing JSON data
- buf_size (int): Buffer size for reading file data (default: 64*1024)
- **config: Backend-specific configuration options
Returns:
Generator yielding (prefix, event, value) tuples where:
- prefix (str): JSON path to current location
- event (str): Event type ('null', 'boolean', 'number', 'string', 'map_key', 'start_map', 'end_map', 'start_array', 'end_array')
- value: Event value (varies by event type)
Raises:
- JSONError: For malformed JSON
- IncompleteJSONError: For truncated JSON data
"""Usage Examples:
import ijson
json_data = '{"users": [{"name": "Alice", "active": true}, {"name": "Bob", "active": false}]}'
for prefix, event, value in ijson.parse(json_data):
if event == 'string' and prefix.endswith('.name'):
print(f"Found user name: {value}")
elif event == 'boolean' and prefix.endswith('.active'):
print(f"Active status: {value}")Provides the lowest-level parsing interface, yielding raw JSON events without path context. Most efficient for custom parsing logic that doesn't need path information.
def basic_parse(source, buf_size=64*1024, **config):
"""
Yield low-level (event, value) parsing events.
Parameters:
- source: File-like object, string, bytes, or iterable containing JSON data
- buf_size (int): Buffer size for reading file data (default: 64*1024)
- **config: Backend-specific configuration options
Returns:
Generator yielding (event, value) tuples where:
- event (str): Event type ('null', 'boolean', 'number', 'string', 'map_key', 'start_map', 'end_map', 'start_array', 'end_array')
- value: Event value (None for structural events, actual value for data events)
Raises:
- JSONError: For malformed JSON
- IncompleteJSONError: For truncated JSON data
"""Usage Examples:
import ijson
from ijson.common import ObjectBuilder
# Build custom objects from events
json_data = '{"name": "Alice", "age": 30, "active": true}'
builder = ObjectBuilder()
for event, value in ijson.basic_parse(json_data):
builder.event(event, value)
result = builder.value
print(result) # {'name': 'Alice', 'age': 30, 'active': True}
# Custom event processing
for event, value in ijson.basic_parse(json_data):
if event == 'string':
print(f"String value: {value}")
elif event == 'number':
print(f"Number value: {value}")All parsing functions accept multiple input source types:
open() in binary or text moderead() method (requires async variants)import ijson
from ijson.common import JSONError, IncompleteJSONError
try:
data = ijson.items(malformed_json, 'data.item')
for item in data:
process(item)
except IncompleteJSONError:
print("JSON data was truncated or incomplete")
except JSONError as e:
print(f"JSON parsing error: {e}")Utility functions for converting JSON number strings to Python numeric types.
def integer_or_decimal(str_value):
"""
Convert string to int or Decimal for precision.
Parameters:
- str_value (str): String representation of a number
Returns:
int or decimal.Decimal: Parsed number value
"""
def integer_or_float(str_value):
"""
Convert string to int or float.
Parameters:
- str_value (str): String representation of a number
Returns:
int or float: Parsed number value
"""
def number(str_value):
"""
DEPRECATED: Convert string to int or Decimal.
Use integer_or_decimal() instead.
Parameters:
- str_value (str): String representation of a number
Returns:
int or decimal.Decimal: Parsed number value
Raises:
DeprecationWarning: Function will be removed in future release
"""Usage Examples:
from ijson.common import integer_or_decimal, integer_or_float
# Convert JSON number strings
result1 = integer_or_decimal("42") # int(42)
result2 = integer_or_decimal("3.14159") # Decimal('3.14159')
result3 = integer_or_float("42") # int(42)
result4 = integer_or_float("3.14159") # float(3.14159)buf_size for better performance with large filesInstall with Tessl CLI
npx tessl i tessl/pypi-ijson