Python package for manipulating 2-dimensional tabular data structures with emphasis on speed and big data support
—
The Frame class is datatable's main data structure for representing and manipulating 2-dimensional tabular data with high-performance columnar storage.
The primary data structure for tabular data with column-oriented storage, supporting various data types and high-performance operations.
class Frame:
def __init__(self, data=None, *, names=None, stypes=None,
stype=None, types=None, type=None):
"""
Create a new Frame from various data sources.
Parameters:
- data: Data source (dict, list, numpy array, pandas DataFrame, etc.)
- names: Column names (list of strings)
- stypes: Column storage types (list of stype objects)
- stype: Single stype for all columns
- types: Alias for stypes
- type: Alias for stype
"""
# Properties
@property
def shape(self) -> tuple:
"""(nrows, ncols) tuple describing Frame dimensions"""
@property
def names(self) -> tuple:
"""Column names as a tuple of strings"""
@property
def stypes(self) -> tuple:
"""Column storage types as tuple of stype objects"""
@property
def ltypes(self) -> tuple:
"""Column logical types as tuple of ltype objects"""
@property
def nrows(self) -> int:
"""Number of rows"""
@property
def ncols(self) -> int:
"""Number of columns"""
# Data access and manipulation
def __getitem__(self, key):
"""Select rows and/or columns using various indexing methods"""
def __setitem__(self, key, value):
"""Update or add columns and rows"""
def __len__(self) -> int:
"""Number of rows in the Frame"""
# Conversion methods
def to_pandas(self) -> 'pandas.DataFrame':
"""Convert to pandas DataFrame"""
def to_numpy(self) -> 'numpy.ndarray':
"""Convert to numpy array"""
def to_dict(self) -> dict:
"""Convert to dictionary"""
def to_list(self) -> list:
"""Convert to list of lists"""
def to_csv(self, file=None, **kwargs):
"""Write Frame to CSV file or string"""
# Display methods
def head(self, n=10) -> 'Frame':
"""Return first n rows"""
def tail(self, n=10) -> 'Frame':
"""Return last n rows"""
def view(self, start_row=None, end_row=None):
"""Display Frame in terminal or notebook"""
# Statistical methods
def describe(self) -> 'Frame':
"""Generate descriptive statistics"""
def nunique(self) -> 'Frame':
"""Count unique values in each column"""
def countna(self) -> 'Frame':
"""Count missing values in each column"""
# Data manipulation
def copy(self, deep=True) -> 'Frame':
"""Create a copy of the Frame"""
def delete(self, rows=None, cols=None):
"""Delete specified rows and/or columns"""
def sort(self, *cols) -> 'Frame':
"""Sort Frame by specified columns"""
def unique(self, *cols) -> 'Frame':
"""Return unique rows based on specified columns"""
def group_by(self, *cols):
"""Group Frame by specified columns"""
# Export methods
def export_names(self) -> tuple:
"""Export column names"""
def export_stypes(self) -> tuple:
"""Export column storage types"""import datatable as dt
# From dictionary
DT = dt.Frame({
'A': [1, 2, 3, 4, 5],
'B': ['a', 'b', 'c', 'd', 'e'],
'C': [1.1, 2.2, 3.3, 4.4, 5.5]
})
# From list of lists
DT = dt.Frame([[1, 'a', 1.1], [2, 'b', 2.2], [3, 'c', 3.3]],
names=['A', 'B', 'C'])
# From numpy array
import numpy as np
arr = np.random.rand(1000, 5)
DT = dt.Frame(arr)
# From pandas DataFrame
import pandas as pd
pdf = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
DT = dt.Frame(pdf)
# Empty Frame with specified structure
DT = dt.Frame(names=['A', 'B', 'C'],
stypes=[dt.int64, dt.str64, dt.float64])
# With type specification
DT = dt.Frame([1, 2, 3, 4, 5], stype=dt.float32)# Column selection
DT[:, 'A'] # Select column A
DT[:, ['A', 'B']] # Select multiple columns
DT[:, f.A] # Select using f object
DT[:, f[:]] # Select all columns
# Row selection
DT[0, :] # First row
DT[0:5, :] # First 5 rows
DT[-1, :] # Last row
DT[f.A > 2, :] # Conditional selection
# Combined selection
DT[f.A > 2, ['B', 'C']] # Filter rows and select columns
DT[0:10, f.A:f.C] # Slice rows and columns
# Boolean indexing
mask = DT[:, f.A > dt.mean(f.A)]
DT[mask, :]# Basic properties
print(DT.shape) # (nrows, ncols)
print(DT.names) # Column names
print(DT.stypes) # Storage types
print(DT.nrows) # Number of rows
print(DT.ncols) # Number of columns
# Data inspection
DT.head() # First 10 rows
DT.tail(5) # Last 5 rows
DT.describe() # Summary statistics
DT.nunique() # Unique value counts
DT.countna() # Missing value counts
# Display
DT.view() # Interactive view
print(DT) # String representationclass Type:
"""Type system helper for datatable operations"""
pass
class FExpr:
"""Expression object representing column operations and transformations"""
pass
class Namespace:
"""Namespace object for organizing column references and operations"""
passInstall with Tessl CLI
npx tessl i tessl/pypi-datatable