CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-daft

Distributed Dataframes for Multimodal Data with high-performance query engine and support for complex nested data structures, AI/ML operations, and seamless cloud storage integration.

Pending
Overview
Eval results
Files

expressions.mddocs/

Expressions and Functions

Column operations and computational expressions for data transformation. Expressions provide type-safe, optimizable operations that can be applied to DataFrame columns with support for complex nested operations and function composition.

Capabilities

Core Expression Functions

Create and manipulate column expressions for DataFrame operations.

def col(name: str) -> Expression:
    """
    Reference a DataFrame column by name.
    
    Parameters:
    - name: Column name to reference
    
    Returns:
    Expression: Column expression
    """

def lit(value: Any) -> Expression:
    """
    Create literal value expression.
    
    Parameters:
    - value: Literal value (number, string, boolean, etc.)
    
    Returns:
    Expression: Literal expression
    """

def coalesce(*exprs: Expression) -> Expression:
    """
    Return first non-null value from expressions.
    
    Parameters:
    - exprs: Expressions to evaluate in order
    
    Returns:
    Expression: Coalesced expression
    """

Conditional Expressions

Create conditional logic with when/otherwise patterns.

class Expression:
    def when(self, predicate: Expression) -> Expression:
        """
        Create conditional expression.
        
        Parameters:
        - predicate: Boolean condition
        
        Returns:
        Expression: Conditional expression
        """
    
    def otherwise(self, expr: Expression) -> Expression:
        """
        Provide else clause for conditional expression.
        
        Parameters:
        - expr: Expression to use when condition is false
        
        Returns:
        Expression: Complete conditional expression
        """

def when(predicate: Expression) -> Expression:
    """
    Start conditional expression chain.
    
    Parameters:
    - predicate: Boolean condition
    
    Returns:
    Expression: Conditional expression builder
    """

Complex Data Types

Work with arrays, lists, and nested structures.

def list_(*exprs: Expression) -> Expression:
    """
    Create list expression from multiple expressions.
    
    Parameters:
    - exprs: Expressions to combine into list
    
    Returns:
    Expression: List expression
    """

def struct(**kwargs: Expression) -> Expression:
    """
    Create struct expression from named expressions.
    
    Parameters:
    - kwargs: Named expressions for struct fields
    
    Returns:
    Expression: Struct expression
    """

def element(n: int) -> Expression:
    """
    Extract element from array/list by index.
    
    Parameters:
    - n: Index to extract (0-based)
    
    Returns:
    Expression: Element extraction expression
    """

String Operations

String manipulation and text processing functions.

class Expression:
    def str_contains(self, pattern: str, regex: bool = False) -> Expression:
        """
        Check if string contains pattern.
        
        Parameters:
        - pattern: Pattern to search for
        - regex: Whether pattern is regular expression
        
        Returns:
        Expression: Boolean expression
        """
    
    def str_length(self) -> Expression:
        """
        Get string length.
        
        Returns:
        Expression: String length expression
        """
    
    def str_upper(self) -> Expression:
        """
        Convert string to uppercase.
        
        Returns:
        Expression: Uppercase string expression
        """
    
    def str_lower(self) -> Expression:
        """
        Convert string to lowercase.
        
        Returns:
        Expression: Lowercase string expression
        """
    
    def str_slice(self, start: int, end: Optional[int] = None) -> Expression:
        """
        Extract substring.
        
        Parameters:
        - start: Start index
        - end: End index (end of string if None)
        
        Returns:
        Expression: Substring expression
        """

Mathematical Operations

Arithmetic and mathematical functions.

class Expression:
    def __add__(self, other: Union[Expression, Any]) -> Expression:
        """Addition operation."""
    
    def __sub__(self, other: Union[Expression, Any]) -> Expression:
        """Subtraction operation."""
    
    def __mul__(self, other: Union[Expression, Any]) -> Expression:
        """Multiplication operation."""
    
    def __truediv__(self, other: Union[Expression, Any]) -> Expression:
        """Division operation."""
    
    def __mod__(self, other: Union[Expression, Any]) -> Expression:
        """Modulo operation."""
    
    def abs(self) -> Expression:
        """Absolute value."""
    
    def ceil(self) -> Expression:
        """Ceiling function."""
    
    def floor(self) -> Expression:
        """Floor function."""
    
    def round(self, decimals: int = 0) -> Expression:
        """Round to specified decimal places."""
    
    def sqrt(self) -> Expression:
        """Square root."""
    
    def sin(self) -> Expression:
        """Sine function."""
    
    def cos(self) -> Expression:
        """Cosine function."""
    
    def tan(self) -> Expression:
        """Tangent function."""

Comparison Operations

Comparison and logical operations.

class Expression:
    def __eq__(self, other: Union[Expression, Any]) -> Expression:
        """Equality comparison."""
    
    def __ne__(self, other: Union[Expression, Any]) -> Expression:
        """Inequality comparison."""
    
    def __lt__(self, other: Union[Expression, Any]) -> Expression:
        """Less than comparison."""
    
    def __le__(self, other: Union[Expression, Any]) -> Expression:
        """Less than or equal comparison."""
    
    def __gt__(self, other: Union[Expression, Any]) -> Expression:
        """Greater than comparison."""
    
    def __ge__(self, other: Union[Expression, Any]) -> Expression:
        """Greater than or equal comparison."""
    
    def __and__(self, other: Expression) -> Expression:
        """Logical AND operation."""
    
    def __or__(self, other: Expression) -> Expression:
        """Logical OR operation."""
    
    def __invert__(self) -> Expression:
        """Logical NOT operation."""
    
    def isin(self, values: List[Any]) -> Expression:
        """Check if value is in list."""
    
    def is_null(self) -> Expression:
        """Check if value is null."""
    
    def is_not_null(self) -> Expression:
        """Check if value is not null."""

Type Operations

Type casting and validation.

class Expression:
    def cast(self, dtype: DataType) -> Expression:
        """
        Cast expression to different data type.
        
        Parameters:
        - dtype: Target data type
        
        Returns:
        Expression: Cast expression
        """
    
    def try_cast(self, dtype: DataType) -> Expression:
        """
        Attempt to cast, returning null on failure.
        
        Parameters:
        - dtype: Target data type
        
        Returns:
        Expression: Safe cast expression
        """

Aggregation Expressions

Create aggregation expressions for group operations.

class Expression:
    def sum(self) -> Expression:
        """Sum aggregation."""
    
    def mean(self) -> Expression:
        """Mean aggregation."""
    
    def min(self) -> Expression:
        """Minimum aggregation."""
    
    def max(self) -> Expression:
        """Maximum aggregation."""
    
    def count(self) -> Expression:
        """Count aggregation."""
    
    def std(self) -> Expression:
        """Standard deviation aggregation."""
    
    def first(self) -> Expression:
        """First value aggregation."""
    
    def last(self) -> Expression:
        """Last value aggregation."""
    
    def list_agg(self) -> Expression:
        """Aggregate into list."""

DateTime Operations

Date and time manipulation functions.

def interval(value: int, unit: str) -> Expression:
    """
    Create time interval expression.
    
    Parameters:
    - value: Interval value
    - unit: Time unit ('days', 'hours', 'minutes', 'seconds')
    
    Returns:
    Expression: Interval expression
    """

class Expression:
    def dt_year(self) -> Expression:
        """Extract year from datetime."""
    
    def dt_month(self) -> Expression:
        """Extract month from datetime."""
    
    def dt_day(self) -> Expression:
        """Extract day from datetime."""
    
    def dt_hour(self) -> Expression:
        """Extract hour from datetime."""
    
    def dt_minute(self) -> Expression:
        """Extract minute from datetime."""
    
    def dt_second(self) -> Expression:
        """Extract second from datetime."""
    
    def dt_date(self) -> Expression:
        """Extract date part from datetime."""

Window Functions

Window-based operations and rankings.

class Expression:
    def over(self, window: Window) -> Expression:
        """
        Apply expression over window.
        
        Parameters:
        - window: Window specification
        
        Returns:
        Expression: Windowed expression
        """

def row_number() -> Expression:
    """Row number within partition."""

def rank() -> Expression:
    """Rank within partition."""

def dense_rank() -> Expression:
    """Dense rank within partition."""

class Window:
    def __init__(
        self,
        partition_by: Optional[List[Expression]] = None,
        order_by: Optional[List[Expression]] = None
    ):
        """
        Create window specification.
        
        Parameters:
        - partition_by: Columns to partition by
        - order_by: Columns to order by within partition
        """
    
    def rows_between(self, start: int, end: int) -> "Window":
        """
        Define row-based frame boundaries.
        
        Parameters:
        - start: Start row offset (negative for preceding rows)
        - end: End row offset (positive for following rows)
        
        Returns:
        Window: Window with row frame specification
        """
    
    def range_between(self, start: Expression, end: Expression) -> "Window":
        """
        Define range-based frame boundaries.
        
        Parameters:
        - start: Start range value
        - end: End range value
        
        Returns:
        Window: Window with range frame specification
        """

# Window frame constants
unbounded_preceding: Expression  # Unbounded preceding boundary
unbounded_following: Expression  # Unbounded following boundary  
current_row: Expression          # Current row boundary

Built-in Functions

Additional utility functions for data processing.

def columns_sum(*cols: ColumnInputType) -> Expression:
    """Sum across multiple columns."""

def columns_mean(*cols: ColumnInputType) -> Expression:
    """Mean across multiple columns."""

def columns_min(*cols: ColumnInputType) -> Expression:
    """Minimum across multiple columns."""

def columns_max(*cols: ColumnInputType) -> Expression:
    """Maximum across multiple columns."""

def monotonically_increasing_id() -> Expression:
    """Generate monotonically increasing IDs."""

def format(template: str, *args: Expression) -> Expression:
    """
    Format string with expression arguments.
    
    Parameters:
    - template: Format string template
    - args: Expressions to format into template
    
    Returns:
    Expression: Formatted string expression
    """

Embedding Operations

Vector embedding operations for similarity calculations.

class Expression:
    @property
    def embedding(self) -> "ExpressionEmbeddingNamespace":
        """Access embedding operations namespace."""
    
class ExpressionEmbeddingNamespace:
    def cosine_distance(self, other: Expression) -> Expression:
        """
        Calculate cosine distance between embedding vectors.
        
        Parameters:
        - other: Another embedding expression to compare against
        
        Returns:
        Expression: Cosine distance (0.0 = identical, 2.0 = opposite)
        """

Usage Examples

Basic Expression Operations

from daft import col, lit, when

# Arithmetic operations
df.select(
    col("price") * col("quantity").alias("total"),
    (col("price") * 1.1).alias("price_with_tax"),
    col("amount") + lit(100).alias("adjusted_amount")
)

# String operations
df.select(
    col("name").str_upper().alias("name_upper"),
    col("email").str_contains("@gmail.com").alias("is_gmail"),
    col("description").str_length().alias("desc_length")
)

Conditional Logic

# Conditional expressions
df.select(
    when(col("age") >= 18)
    .then(lit("Adult"))
    .otherwise(lit("Minor"))
    .alias("age_group"),
    
    when(col("score") >= 90).then(lit("A"))
    .when(col("score") >= 80).then(lit("B"))
    .when(col("score") >= 70).then(lit("C"))
    .otherwise(lit("F"))
    .alias("grade")
)

Complex Data Operations

from daft import list_, struct, element

# Working with arrays and structs
df.select(
    list_(col("item1"), col("item2"), col("item3")).alias("items"),
    struct(
        name=col("name"),
        age=col("age"),
        active=col("is_active")
    ).alias("person"),
    element(0).alias("first_item")  # Extract first element from array
)

Aggregation with Expressions

# Complex aggregations
df.groupby("category").agg(
    col("price").mean().alias("avg_price"),
    col("quantity").sum().alias("total_quantity"),
    (col("price") * col("quantity")).sum().alias("total_revenue"),
    col("name").count().alias("item_count")
)

DateTime Processing

from daft import interval

# Date/time operations
df.select(
    col("created_at").dt_year().alias("year"),
    col("created_at").dt_month().alias("month"),
    (col("created_at") + interval(30, "days")).alias("future_date"),
    col("timestamp").dt_date().alias("date_only")
)

Window Functions

from daft.window import Window
from daft.functions import row_number, rank

# Window operations
window = Window(
    partition_by=[col("department")],
    order_by=[col("salary").desc()]
)

df.select(
    col("name"),
    col("department"),
    col("salary"),
    row_number().over(window).alias("rank_in_dept"),
    col("salary").sum().over(window).alias("dept_total_salary")
)

Expression Visitor Pattern

class ExpressionVisitor:
    """Visitor pattern for traversing expression trees."""
    
    def visit(self, expr: Expression) -> Any:
        """Visit expression node."""
    
    def visit_column(self, expr: Expression) -> Any:
        """Visit column reference."""
    
    def visit_literal(self, expr: Expression) -> Any:
        """Visit literal value."""
    
    def visit_function(self, expr: Expression) -> Any:
        """Visit function call."""

class ExpressionsProjection:
    """Collection of expressions for projection operations."""
    
    def __init__(self, exprs: List[Expression]): ...
    
    def to_list(self) -> List[Expression]:
        """Convert to list of expressions."""

Types

ColumnInputType = Union[str, Expression]

Install with Tessl CLI

npx tessl i tessl/pypi-daft

docs

ai-ml.md

catalog.md

data-io.md

dataframe-operations.md

expressions.md

index.md

session.md

sql.md

udf.md

tile.json