Toolkit for Python-based database access that makes reading and writing data in databases as simple as working with JSON files
—
Complete table manipulation including CRUD operations, schema management, indexing, and advanced querying capabilities. The Table class provides automatic column creation, type inference, and flexible data operations.
Insert single or multiple rows with automatic schema creation and type inference.
class Table:
def insert(self, row, ensure=None, types=None):
"""
Add a row dict by inserting it into the table.
Parameters:
- row: dict, data to insert
- ensure: bool, create missing columns automatically (default: db.ensure_schema)
- types: dict, SQLAlchemy types for columns {column_name: type}
Returns:
Primary key of inserted row or True
"""
def insert_ignore(self, row, keys, ensure=None, types=None):
"""
Add a row dict into the table if the row does not exist.
Parameters:
- row: dict, data to insert
- keys: list, columns to check for existing rows
- ensure: bool, create missing columns automatically
- types: dict, SQLAlchemy types for columns
Returns:
Primary key of inserted row or False if exists
"""
def insert_many(self, rows, chunk_size=1000, ensure=None, types=None):
"""
Add many rows at a time.
Parameters:
- rows: list of dict, data to insert
- chunk_size: int, number of rows per batch (default 1000)
- ensure: bool, create missing columns automatically
- types: dict, SQLAlchemy types for columns
"""Update existing records with flexible filtering and bulk operations.
class Table:
def update(self, row, keys, ensure=None, types=None, return_count=False):
"""
Update rows in the table.
Parameters:
- row: dict, data values to update
- keys: list, column names to use as filters
- ensure: bool, create missing columns automatically
- types: dict, SQLAlchemy types for columns
- return_count: bool, return count of affected rows
Returns:
Number of updated rows (if supported by database)
"""
def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None):
"""
Update many rows in the table at a time.
Parameters:
- rows: list of dict, data to update
- keys: list, column names to use as filters
- chunk_size: int, number of rows per batch (default 1000)
- ensure: bool, create missing columns automatically
- types: dict, SQLAlchemy types for columns
"""Insert or update records based on key matching.
class Table:
def upsert(self, row, keys, ensure=None, types=None):
"""
An UPSERT is a smart combination of insert and update.
Parameters:
- row: dict, data to insert or update
- keys: list, columns to check for existing records
- ensure: bool, create missing columns automatically
- types: dict, SQLAlchemy types for columns
Returns:
Primary key of inserted row or True for updates
"""
def upsert_many(self, rows, keys, chunk_size=1000, ensure=None, types=None):
"""
Multiple upsert operations.
Parameters:
- rows: list of dict, data to upsert
- keys: list, columns to check for existing records
- chunk_size: int, processing batch size (default 1000)
- ensure: bool, create missing columns automatically
- types: dict, SQLAlchemy types for columns
"""Query and retrieve data with flexible filtering, ordering, and pagination.
class Table:
def find(self, *_clauses, **kwargs):
"""
Perform a simple search on the table.
Parameters:
- **kwargs: column filters for equality matching
- _limit: int, maximum number of rows to return
- _offset: int, number of rows to skip
- order_by: str or list, columns to sort by (prefix with '-' for desc)
- _streamed: bool, use streaming for large result sets
- _step: int, fetch step size (default 1000)
Additional filter operators via dict values:
- {'gt': value} or {'>': value}: greater than
- {'lt': value} or {'<': value}: less than
- {'gte': value} or {'>=': value}: greater than or equal
- {'lte': value} or {'<=': value}: less than or equal
- {'like': value}: SQL LIKE pattern matching
- {'ilike': value}: case-insensitive LIKE
- {'in': [values]}: value in list
- {'between': [start, end]}: value between range
Returns:
ResultIter: Iterator over matching rows
"""
def find_one(self, *args, **kwargs):
"""
Get a single result from the table.
Parameters: Same as find()
Returns:
dict-like row object or None
"""
def count(self, *_clauses, **kwargs):
"""
Return the count of results for the given filter set.
Parameters: Same as find() but excludes _limit/_offset
Returns:
int: Number of matching rows
"""
def distinct(self, *args, **_filter):
"""
Return all unique (distinct) values for the given columns.
Parameters:
- *args: column names to get distinct values for
- **_filter: additional filters to apply
Returns:
Iterator over unique combinations
"""
# Legacy alias
all = findRemove records from the table with flexible filtering.
class Table:
def delete(self, *clauses, **filters):
"""
Delete rows from the table.
Parameters:
- **filters: column-based equality filters
- *clauses: additional SQLAlchemy clauses
Returns:
bool: True if any rows were deleted
"""Manage table structure with column operations and type handling.
class Table:
def has_column(self, column):
"""
Check if a column with the given name exists on this table.
Parameters:
- column: str, column name
Returns:
bool: True if column exists
"""
def create_column(self, name, type, **kwargs):
"""
Create a new column of a specified type.
Parameters:
- name: str, column name
- type: SQLAlchemy type, column data type
- **kwargs: additional Column constructor arguments
"""
def create_column_by_example(self, name, value):
"""
Create a new column with a type appropriate for the example value.
Parameters:
- name: str, column name
- value: sample value for type inference
"""
def drop_column(self, name):
"""
Drop the column (not supported on SQLite).
Parameters:
- name: str, column name to drop
"""
def drop(self):
"""Drop the table from the database."""Create and manage database indexes for query performance.
class Table:
def has_index(self, columns):
"""
Check if an index exists to cover the given columns.
Parameters:
- columns: str or list, column names
Returns:
bool: True if suitable index exists
"""
def create_index(self, columns, name=None, **kw):
"""
Create an index to speed up queries on a table.
Parameters:
- columns: str or list, column names to index
- name: str, index name (auto-generated if None)
- **kw: additional Index constructor arguments
"""Access table metadata and structure information.
class Table:
@property
def exists(self):
"""Check if the table currently exists in the database."""
@property
def columns(self):
"""Get a list of all column names that exist in the table."""
@property
def table(self):
"""Get a reference to the SQLAlchemy table object."""
def __len__(self):
"""Return the number of rows in the table."""
def __iter__(self):
"""Return all rows of the table as simple dictionaries."""import dataset
db = dataset.connect('sqlite:///example.db')
table = db['users']
# Insert data
user_id = table.insert({'name': 'John Doe', 'email': 'john@example.com', 'age': 30})
print(f"Inserted user with ID: {user_id}")
# Insert multiple records
users = [
{'name': 'Jane Smith', 'email': 'jane@example.com', 'age': 25},
{'name': 'Bob Johnson', 'email': 'bob@example.com', 'age': 35}
]
table.insert_many(users)
# Find records
for user in table.find(age={'gt': 25}):
print(f"{user['name']} is {user['age']} years old")
# Find single record
user = table.find_one(name='John Doe')
if user:
print(f"Found: {user['email']}")
# Update records
table.update({'age': 31}, ['name'], name='John Doe')
# Upsert (insert or update)
table.upsert({'name': 'Alice Wilson', 'email': 'alice@example.com', 'age': 28}, ['name'])
# Delete records
table.delete(age={'lt': 25})
# Count records
total_users = table.count()
adult_users = table.count(age={'gte': 18})# Multiple conditions
results = table.find(age={'gte': 18}, city='New York')
# Range queries
results = table.find(age={'between': [25, 35]})
# Pattern matching
results = table.find(email={'like': '%@gmail.com'})
# List membership
results = table.find(city={'in': ['New York', 'Boston', 'Chicago']})
# Complex conditions
results = table.find(
age={'gte': 18},
salary={'gt': 50000},
department={'in': ['Engineering', 'Sales']}
)
# Ordering results
results = table.find(order_by='name') # Ascending
results = table.find(order_by='-age') # Descending
results = table.find(order_by=['department', '-salary']) # Multiple columns
# Pagination
page_1 = table.find(_limit=10, _offset=0)
page_2 = table.find(_limit=10, _offset=10)# Check table structure
print(f"Table exists: {table.exists}")
print(f"Columns: {table.columns}")
print(f"Row count: {len(table)}")
# Column management
if not table.has_column('created_at'):
table.create_column('created_at', db.types.datetime)
# Create column by example
table.create_column_by_example('score', 95.5) # Creates FLOAT column
# Custom column types with constraints
table.create_column('status', db.types.string(20), nullable=False, default='active')
# Index creation
table.create_index(['email']) # Single column
table.create_index(['department', 'salary']) # Composite index
# Check for index
if table.has_index(['email']):
print("Email column is indexed")# Explicit type specification
table.insert(
{'name': 'Test User', 'data': {'key': 'value'}},
types={'data': db.types.json} # Force JSON type
)
# Automatic type inference handles:
# - int/float -> appropriate numeric types
# - str -> text type
# - dict/list -> JSON type
# - datetime/date objects -> datetime/date types
# - bool -> boolean type
# Custom row types
import dataset
from collections import namedtuple
UserRow = namedtuple('User', ['id', 'name', 'email'])
db = dataset.connect(row_type=UserRow)# Bulk operations for better performance
with db: # Use transaction
for i in range(10000):
table.insert({'name': f'User {i}', 'value': i})
# Chunked operations (alternative approach)
rows = [{'name': f'User {i}', 'value': i} for i in range(10000)]
table.insert_many(rows, chunk_size=1000)
# Streaming for large result sets
for row in table.find(_streamed=True, _step=100):
process_row(row)
# Index frequently queried columns
table.create_index(['email'])
table.create_index(['created_at'])Install with Tessl CLI
npx tessl i tessl/pypi-dataset