tessl/pypi-dataset

Toolkit for Python-based database access that makes reading and writing data in databases as simple as working with JSON files

—

Pending

Overview

Eval results

Files

Table Operations

Name: tessl/pypi-dataset
Author: tessl

Complete table manipulation including CRUD operations, schema management, indexing, and advanced querying capabilities. The Table class provides automatic column creation, type inference, and flexible data operations.

Capabilities

Data Insertion

Insert single or multiple rows with automatic schema creation and type inference.

class Table:
    def insert(self, row, ensure=None, types=None):
        """
        Add a row dict by inserting it into the table.
        
        Parameters:
        - row: dict, data to insert
        - ensure: bool, create missing columns automatically (default: db.ensure_schema)
        - types: dict, SQLAlchemy types for columns {column_name: type}
        
        Returns:
        Primary key of inserted row or True
        """
        
    def insert_ignore(self, row, keys, ensure=None, types=None):
        """
        Add a row dict into the table if the row does not exist.
        
        Parameters:
        - row: dict, data to insert
        - keys: list, columns to check for existing rows
        - ensure: bool, create missing columns automatically
        - types: dict, SQLAlchemy types for columns
        
        Returns:
        Primary key of inserted row or False if exists
        """
        
    def insert_many(self, rows, chunk_size=1000, ensure=None, types=None):
        """
        Add many rows at a time.
        
        Parameters:
        - rows: list of dict, data to insert
        - chunk_size: int, number of rows per batch (default 1000)
        - ensure: bool, create missing columns automatically
        - types: dict, SQLAlchemy types for columns
        """

Data Updates

Update existing records with flexible filtering and bulk operations.

class Table:
    def update(self, row, keys, ensure=None, types=None, return_count=False):
        """
        Update rows in the table.
        
        Parameters:
        - row: dict, data values to update
        - keys: list, column names to use as filters
        - ensure: bool, create missing columns automatically
        - types: dict, SQLAlchemy types for columns
        - return_count: bool, return count of affected rows
        
        Returns:
        Number of updated rows (if supported by database)
        """
        
    def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None):
        """
        Update many rows in the table at a time.
        
        Parameters:
        - rows: list of dict, data to update
        - keys: list, column names to use as filters
        - chunk_size: int, number of rows per batch (default 1000)
        - ensure: bool, create missing columns automatically
        - types: dict, SQLAlchemy types for columns
        """

Upsert Operations

Insert or update records based on key matching.

class Table:
    def upsert(self, row, keys, ensure=None, types=None):
        """
        An UPSERT is a smart combination of insert and update.
        
        Parameters:
        - row: dict, data to insert or update
        - keys: list, columns to check for existing records
        - ensure: bool, create missing columns automatically
        - types: dict, SQLAlchemy types for columns
        
        Returns:
        Primary key of inserted row or True for updates
        """
        
    def upsert_many(self, rows, keys, chunk_size=1000, ensure=None, types=None):
        """
        Multiple upsert operations.
        
        Parameters:
        - rows: list of dict, data to upsert
        - keys: list, columns to check for existing records
        - chunk_size: int, processing batch size (default 1000)
        - ensure: bool, create missing columns automatically
        - types: dict, SQLAlchemy types for columns
        """

Data Querying

Query and retrieve data with flexible filtering, ordering, and pagination.

class Table:
    def find(self, *_clauses, **kwargs):
        """
        Perform a simple search on the table.
        
        Parameters:
        - **kwargs: column filters for equality matching
        - _limit: int, maximum number of rows to return
        - _offset: int, number of rows to skip
        - order_by: str or list, columns to sort by (prefix with '-' for desc)
        - _streamed: bool, use streaming for large result sets
        - _step: int, fetch step size (default 1000)
        
        Additional filter operators via dict values:
        - {'gt': value} or {'>': value}: greater than
        - {'lt': value} or {'<': value}: less than
        - {'gte': value} or {'>=': value}: greater than or equal
        - {'lte': value} or {'<=': value}: less than or equal
        - {'like': value}: SQL LIKE pattern matching
        - {'ilike': value}: case-insensitive LIKE
        - {'in': [values]}: value in list
        - {'between': [start, end]}: value between range
        
        Returns:
        ResultIter: Iterator over matching rows
        """
        
    def find_one(self, *args, **kwargs):
        """
        Get a single result from the table.
        
        Parameters: Same as find()
        
        Returns:
        dict-like row object or None
        """
        
    def count(self, *_clauses, **kwargs):
        """
        Return the count of results for the given filter set.
        
        Parameters: Same as find() but excludes _limit/_offset
        
        Returns:
        int: Number of matching rows
        """
        
    def distinct(self, *args, **_filter):
        """
        Return all unique (distinct) values for the given columns.
        
        Parameters:
        - *args: column names to get distinct values for
        - **_filter: additional filters to apply
        
        Returns:
        Iterator over unique combinations
        """
        
    # Legacy alias
    all = find

Data Deletion

Remove records from the table with flexible filtering.

class Table:
    def delete(self, *clauses, **filters):
        """
        Delete rows from the table.
        
        Parameters:
        - **filters: column-based equality filters
        - *clauses: additional SQLAlchemy clauses
        
        Returns:
        bool: True if any rows were deleted
        """

Schema Management

Manage table structure with column operations and type handling.

class Table:
    def has_column(self, column):
        """
        Check if a column with the given name exists on this table.
        
        Parameters:
        - column: str, column name
        
        Returns:
        bool: True if column exists
        """
        
    def create_column(self, name, type, **kwargs):
        """
        Create a new column of a specified type.
        
        Parameters:
        - name: str, column name
        - type: SQLAlchemy type, column data type
        - **kwargs: additional Column constructor arguments
        """
        
    def create_column_by_example(self, name, value):
        """
        Create a new column with a type appropriate for the example value.
        
        Parameters:
        - name: str, column name
        - value: sample value for type inference
        """
        
    def drop_column(self, name):
        """
        Drop the column (not supported on SQLite).
        
        Parameters:
        - name: str, column name to drop
        """
        
    def drop(self):
        """Drop the table from the database."""

Index Management

Create and manage database indexes for query performance.

class Table:
    def has_index(self, columns):
        """
        Check if an index exists to cover the given columns.
        
        Parameters:
        - columns: str or list, column names
        
        Returns:
        bool: True if suitable index exists
        """
        
    def create_index(self, columns, name=None, **kw):
        """
        Create an index to speed up queries on a table.
        
        Parameters:
        - columns: str or list, column names to index
        - name: str, index name (auto-generated if None)
        - **kw: additional Index constructor arguments
        """

Table Properties

Access table metadata and structure information.

class Table:
    @property
    def exists(self):
        """Check if the table currently exists in the database."""
        
    @property
    def columns(self):
        """Get a list of all column names that exist in the table."""
        
    @property
    def table(self):
        """Get a reference to the SQLAlchemy table object."""
        
    def __len__(self):
        """Return the number of rows in the table."""
        
    def __iter__(self):
        """Return all rows of the table as simple dictionaries."""

Usage Examples

Basic CRUD Operations

import dataset

db = dataset.connect('sqlite:///example.db')
table = db['users']

# Insert data
user_id = table.insert({'name': 'John Doe', 'email': 'john@example.com', 'age': 30})
print(f"Inserted user with ID: {user_id}")

# Insert multiple records
users = [
    {'name': 'Jane Smith', 'email': 'jane@example.com', 'age': 25},
    {'name': 'Bob Johnson', 'email': 'bob@example.com', 'age': 35}
]
table.insert_many(users)

# Find records
for user in table.find(age={'gt': 25}):
    print(f"{user['name']} is {user['age']} years old")

# Find single record
user = table.find_one(name='John Doe')
if user:
    print(f"Found: {user['email']}")

# Update records
table.update({'age': 31}, ['name'], name='John Doe')

# Upsert (insert or update)
table.upsert({'name': 'Alice Wilson', 'email': 'alice@example.com', 'age': 28}, ['name'])

# Delete records
table.delete(age={'lt': 25})

# Count records
total_users = table.count()
adult_users = table.count(age={'gte': 18})

Advanced Filtering

# Multiple conditions
results = table.find(age={'gte': 18}, city='New York')

# Range queries
results = table.find(age={'between': [25, 35]})

# Pattern matching
results = table.find(email={'like': '%@gmail.com'})

# List membership
results = table.find(city={'in': ['New York', 'Boston', 'Chicago']})

# Complex conditions
results = table.find(
    age={'gte': 18},
    salary={'gt': 50000},
    department={'in': ['Engineering', 'Sales']}
)

# Ordering results
results = table.find(order_by='name')  # Ascending
results = table.find(order_by='-age')  # Descending
results = table.find(order_by=['department', '-salary'])  # Multiple columns

# Pagination
page_1 = table.find(_limit=10, _offset=0)
page_2 = table.find(_limit=10, _offset=10)

Schema Operations

# Check table structure
print(f"Table exists: {table.exists}")
print(f"Columns: {table.columns}")
print(f"Row count: {len(table)}")

# Column management
if not table.has_column('created_at'):
    table.create_column('created_at', db.types.datetime)

# Create column by example
table.create_column_by_example('score', 95.5)  # Creates FLOAT column

# Custom column types with constraints
table.create_column('status', db.types.string(20), nullable=False, default='active')

# Index creation
table.create_index(['email'])  # Single column
table.create_index(['department', 'salary'])  # Composite index

# Check for index
if table.has_index(['email']):
    print("Email column is indexed")

Type Handling

# Explicit type specification
table.insert(
    {'name': 'Test User', 'data': {'key': 'value'}},
    types={'data': db.types.json}  # Force JSON type
)

# Automatic type inference handles:
# - int/float -> appropriate numeric types
# - str -> text type  
# - dict/list -> JSON type
# - datetime/date objects -> datetime/date types
# - bool -> boolean type

# Custom row types
import dataset
from collections import namedtuple

UserRow = namedtuple('User', ['id', 'name', 'email'])
db = dataset.connect(row_type=UserRow)

Performance Optimization

# Bulk operations for better performance
with db:  # Use transaction
    for i in range(10000):
        table.insert({'name': f'User {i}', 'value': i})

# Chunked operations (alternative approach)
rows = [{'name': f'User {i}', 'value': i} for i in range(10000)]
table.insert_many(rows, chunk_size=1000)

# Streaming for large result sets
for row in table.find(_streamed=True, _step=100):
    process_row(row)

# Index frequently queried columns
table.create_index(['email'])
table.create_index(['created_at'])

Install with Tessl CLI