tessl/pypi-datatable

Python package for manipulating 2-dimensional tabular data structures with emphasis on speed and big data support

—

Pending

Overview

Eval results

Files

Core Data Structures

Name: tessl/pypi-datatable
Author: tessl

The Frame class is datatable's main data structure for representing and manipulating 2-dimensional tabular data with high-performance columnar storage.

Capabilities

Frame Class

The primary data structure for tabular data with column-oriented storage, supporting various data types and high-performance operations.

class Frame:
    def __init__(self, data=None, *, names=None, stypes=None, 
                 stype=None, types=None, type=None):
        """
        Create a new Frame from various data sources.
        
        Parameters:
        - data: Data source (dict, list, numpy array, pandas DataFrame, etc.)
        - names: Column names (list of strings)
        - stypes: Column storage types (list of stype objects)
        - stype: Single stype for all columns
        - types: Alias for stypes
        - type: Alias for stype
        """
    
    # Properties
    @property
    def shape(self) -> tuple:
        """(nrows, ncols) tuple describing Frame dimensions"""
    
    @property
    def names(self) -> tuple:
        """Column names as a tuple of strings"""
    
    @property
    def stypes(self) -> tuple:
        """Column storage types as tuple of stype objects"""
    
    @property
    def ltypes(self) -> tuple:
        """Column logical types as tuple of ltype objects"""
    
    @property
    def nrows(self) -> int:
        """Number of rows"""
    
    @property
    def ncols(self) -> int:
        """Number of columns"""
    
    # Data access and manipulation
    def __getitem__(self, key):
        """Select rows and/or columns using various indexing methods"""
    
    def __setitem__(self, key, value):
        """Update or add columns and rows"""
    
    def __len__(self) -> int:
        """Number of rows in the Frame"""
    
    # Conversion methods
    def to_pandas(self) -> 'pandas.DataFrame':
        """Convert to pandas DataFrame"""
    
    def to_numpy(self) -> 'numpy.ndarray':
        """Convert to numpy array"""
    
    def to_dict(self) -> dict:
        """Convert to dictionary"""
    
    def to_list(self) -> list:
        """Convert to list of lists"""
    
    def to_csv(self, file=None, **kwargs):
        """Write Frame to CSV file or string"""
    
    # Display methods
    def head(self, n=10) -> 'Frame':
        """Return first n rows"""
    
    def tail(self, n=10) -> 'Frame':
        """Return last n rows"""
    
    def view(self, start_row=None, end_row=None):
        """Display Frame in terminal or notebook"""
    
    # Statistical methods
    def describe(self) -> 'Frame':
        """Generate descriptive statistics"""
    
    def nunique(self) -> 'Frame':
        """Count unique values in each column"""
    
    def countna(self) -> 'Frame':
        """Count missing values in each column"""
    
    # Data manipulation
    def copy(self, deep=True) -> 'Frame':
        """Create a copy of the Frame"""
    
    def delete(self, rows=None, cols=None):
        """Delete specified rows and/or columns"""
    
    def sort(self, *cols) -> 'Frame':
        """Sort Frame by specified columns"""
    
    def unique(self, *cols) -> 'Frame':
        """Return unique rows based on specified columns"""
    
    def group_by(self, *cols):
        """Group Frame by specified columns"""
    
    # Export methods
    def export_names(self) -> tuple:
        """Export column names"""
    
    def export_stypes(self) -> tuple:
        """Export column storage types"""

Frame Creation Examples

import datatable as dt

# From dictionary
DT = dt.Frame({
    'A': [1, 2, 3, 4, 5],
    'B': ['a', 'b', 'c', 'd', 'e'],
    'C': [1.1, 2.2, 3.3, 4.4, 5.5]
})

# From list of lists
DT = dt.Frame([[1, 'a', 1.1], [2, 'b', 2.2], [3, 'c', 3.3]],
              names=['A', 'B', 'C'])

# From numpy array
import numpy as np
arr = np.random.rand(1000, 5)
DT = dt.Frame(arr)

# From pandas DataFrame
import pandas as pd
pdf = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
DT = dt.Frame(pdf)

# Empty Frame with specified structure
DT = dt.Frame(names=['A', 'B', 'C'], 
              stypes=[dt.int64, dt.str64, dt.float64])

# With type specification
DT = dt.Frame([1, 2, 3, 4, 5], stype=dt.float32)

Frame Indexing and Selection

# Column selection
DT[:, 'A']           # Select column A
DT[:, ['A', 'B']]    # Select multiple columns
DT[:, f.A]           # Select using f object
DT[:, f[:]]          # Select all columns

# Row selection
DT[0, :]             # First row
DT[0:5, :]           # First 5 rows
DT[-1, :]            # Last row
DT[f.A > 2, :]       # Conditional selection

# Combined selection
DT[f.A > 2, ['B', 'C']]    # Filter rows and select columns
DT[0:10, f.A:f.C]          # Slice rows and columns

# Boolean indexing
mask = DT[:, f.A > dt.mean(f.A)]
DT[mask, :]

Frame Properties and Inspection

# Basic properties
print(DT.shape)      # (nrows, ncols)
print(DT.names)      # Column names
print(DT.stypes)     # Storage types
print(DT.nrows)      # Number of rows
print(DT.ncols)      # Number of columns

# Data inspection
DT.head()            # First 10 rows
DT.tail(5)           # Last 5 rows
DT.describe()        # Summary statistics
DT.nunique()         # Unique value counts
DT.countna()         # Missing value counts

# Display
DT.view()            # Interactive view
print(DT)            # String representation

Types

Type Objects

class Type:
    """Type system helper for datatable operations"""
    pass

class FExpr:
    """Expression object representing column operations and transformations"""
    pass

class Namespace:
    """Namespace object for organizing column references and operations"""
    pass

Install with Tessl CLI