Toolkit for Python-based database access that makes reading and writing data in databases as simple as working with JSON files
npx @tessl/cli install tessl/pypi-dataset@1.6.0Dataset is a Python toolkit that makes reading and writing data in databases as simple as working with JSON files. It provides automatic schema creation, upsert functionality, and simple query helpers with support for SQLite, PostgreSQL, and MySQL databases. Built on SQLAlchemy, dataset abstracts away complex SQL operations while providing powerful data management capabilities.
pip install datasetimport datasetCommon patterns:
from dataset import connect, Database, TableFor chunked operations:
from dataset import chunked
# Then use: chunked.ChunkedInsert, chunked.ChunkedUpdateimport dataset
# Connect to database (defaults to in-memory SQLite)
db = dataset.connect('sqlite:///mydatabase.db')
# Get a table (creates if doesn't exist)
table = db['people']
# Insert data (automatically creates columns)
table.insert({'name': 'John Doe', 'age': 30, 'city': 'New York'})
table.insert({'name': 'Jane Smith', 'age': 25, 'city': 'Boston'})
# Query data
for row in table.find(city='New York'):
print(row['name'], row['age'])
# Find single record
person = table.find_one(name='John Doe')
print(person)
# Update records
table.update({'name': 'John Doe', 'age': 31}, ['name'])
# Use transactions
with db:
table.insert({'name': 'Bob Johnson', 'age': 28})
table.upsert({'name': 'John Doe', 'age': 32}, ['name'])
# Close connection
db.close()Dataset follows a layered architecture:
This design enables automatic schema evolution, simplified data operations, and seamless database portability while maintaining full SQLAlchemy power for complex queries.
Core database connection management, table access, transaction control, and raw SQL query execution. Provides the foundation for all dataset operations.
def connect(url=None, schema=None, engine_kwargs=None, ensure_schema=True,
row_type=OrderedDict, sqlite_wal_mode=True, on_connect_statements=None):
"""Opens a new connection to a database."""
class Database:
def __init__(self, url, **kwargs): ...
def get_table(self, table_name, **kwargs): ...
def query(self, query, *args, **kwargs): ...
def begin(self): ...
def commit(self): ...
def rollback(self): ...Complete table manipulation including CRUD operations, schema management, indexing, and advanced querying capabilities with automatic column creation and type inference.
class Table:
def insert(self, row, ensure=None, types=None): ...
def insert_many(self, rows, chunk_size=1000, ensure=None, types=None): ...
def find(self, **kwargs): ...
def find_one(self, **kwargs): ...
def update(self, row, keys, ensure=None, types=None): ...
def upsert(self, row, keys, ensure=None, types=None): ...
def delete(self, **filters): ...High-performance batch processing for large datasets using context managers to automatically handle chunked inserts and updates with configurable batch sizes and callback support.
class ChunkedInsert:
def __init__(self, table, chunksize=1000, callback=None): ...
def insert(self, item): ...
class ChunkedUpdate:
def __init__(self, table, keys, chunksize=1000, callback=None): ...
def update(self, item): ...class Types:
"""SQLAlchemy type holder and utilities."""
integer = Integer # SQLAlchemy Integer type
string = Unicode # SQLAlchemy Unicode type
text = UnicodeText # SQLAlchemy UnicodeText type
float = Float # SQLAlchemy Float type
bigint = BigInteger # SQLAlchemy BigInteger type
boolean = Boolean # SQLAlchemy Boolean type
date = Date # SQLAlchemy Date type
datetime = DateTime # SQLAlchemy DateTime type
def __init__(self, is_postgres=None):
"""Initialize type system with database-specific settings."""
self.json = JSONB if is_postgres else JSON
def guess(self, sample):
"""
Given a sample value, guess the appropriate column type.
Parameters:
- sample: any, sample value to infer type from
Returns:
SQLAlchemy type appropriate for the sample
"""
class DatasetException(Exception):
"""Base exception class for dataset errors."""
# Default row container type (can be customized via row_type parameter)
from collections import OrderedDict
row_type = OrderedDict