tessl/pypi-polars

Blazingly fast DataFrame library for Python with lazy and eager evaluation modes

—

Pending

Overview

Eval results

Files

Data Conversion

Name: tessl/pypi-polars
Author: tessl

Seamless integration with pandas, NumPy, PyArrow, and PyTorch through conversion functions supporting bidirectional data exchange with automatic schema mapping and optimized memory transfer.

Capabilities

From External Libraries

Convert data from popular Python data libraries into Polars DataFrames.

def from_pandas(df, *, schema_overrides=None, rechunk=True, nan_to_null=True, include_index=False) -> DataFrame:
    """
    Convert pandas DataFrame to Polars DataFrame.
    
    Parameters:
    - df: pandas DataFrame
    - schema_overrides: Override column types
    - rechunk: Rechunk to contiguous memory
    - nan_to_null: Convert NaN to null values
    - include_index: Include pandas index as column
    
    Returns:
    Polars DataFrame
    """

def from_numpy(data, schema=None, *, orient=None) -> DataFrame:
    """
    Convert NumPy array to Polars DataFrame.
    
    Parameters:
    - data: NumPy array (1D or 2D)
    - schema: Column names and types
    - orient: Data orientation ('col' or 'row')
    
    Returns:
    Polars DataFrame
    """

def from_arrow(data, *, schema_overrides=None, rechunk=True) -> DataFrame:
    """
    Convert PyArrow Table to Polars DataFrame.
    
    Parameters:
    - data: PyArrow Table or RecordBatch
    - schema_overrides: Override column types
    - rechunk: Rechunk to contiguous memory
    
    Returns:
    Polars DataFrame
    """

def from_torch(tensor, *, schema=None) -> DataFrame:
    """
    Convert PyTorch tensor to Polars DataFrame.
    
    Parameters:
    - tensor: PyTorch tensor
    - schema: Column names and types
    
    Returns:
    Polars DataFrame
    """

From Python Data Structures

Convert native Python data structures into Polars DataFrames.

def from_dict(data, schema=None, *, schema_overrides=None, strict=True, nan_to_null=False) -> DataFrame:
    """
    Convert dictionary to Polars DataFrame.
    
    Parameters:
    - data: Dictionary mapping column names to values
    - schema: Column schema
    - schema_overrides: Override specific column types
    - strict: Strict schema validation
    - nan_to_null: Convert NaN to null values
    
    Returns:
    Polars DataFrame
    """

def from_dicts(dicts, schema=None, *, schema_overrides=None, strict=True, infer_schema_length=100) -> DataFrame:
    """
    Convert list of dictionaries to Polars DataFrame.
    
    Parameters:
    - dicts: List of dictionaries (records)
    - schema: Column schema
    - schema_overrides: Override specific column types
    - strict: Strict schema validation
    - infer_schema_length: Rows to scan for schema inference
    
    Returns:
    Polars DataFrame
    """

def from_records(records, schema=None, *, schema_overrides=None, orient=None, infer_schema_length=100) -> DataFrame:
    """
    Convert records (list of tuples/lists) to Polars DataFrame.
    
    Parameters:
    - records: List of records (tuples or lists)
    - schema: Column schema
    - schema_overrides: Override specific column types
    - orient: Data orientation ('col' or 'row')
    - infer_schema_length: Rows to scan for schema inference
    
    Returns:
    Polars DataFrame
    """

def from_repr(text: str) -> DataFrame:
    """
    Parse DataFrame from string representation.
    
    Parameters:
    - text: String representation of DataFrame
    
    Returns:
    Polars DataFrame
    """

From Generic DataFrame Types

Convert from other DataFrame implementations with automatic protocol detection.

def from_dataframe(df, *, allow_copy=True) -> DataFrame:
    """
    Convert DataFrame interchange object to Polars DataFrame.
    
    Parameters:
    - df: DataFrame implementing interchange protocol
    - allow_copy: Allow copying data if necessary
    
    Returns:
    Polars DataFrame
    """

JSON Normalization

Flatten nested JSON data into tabular format.

def json_normalize(data, *, separator=".", max_level=None) -> DataFrame:
    """
    Normalize nested JSON data into flat DataFrame.
    
    Parameters:
    - data: JSON data (dict, list of dicts, or JSON string)
    - separator: Separator for nested field names
    - max_level: Maximum nesting level to flatten
    
    Returns:
    Normalized DataFrame
    """

Usage Examples

From Pandas

import polars as pl
import pandas as pd
import numpy as np

# Convert pandas DataFrame
pdf = pd.DataFrame({
    'A': [1, 2, 3, np.nan],
    'B': ['a', 'b', 'c', 'd'],
    'C': pd.date_range('2023-01-01', periods=4)
})

# Basic conversion
df = pl.from_pandas(pdf)

# Conversion with options
df = pl.from_pandas(
    pdf,
    schema_overrides={'A': pl.Int32},
    include_index=True,
    nan_to_null=True
)

From NumPy

# 2D array to DataFrame
arr = np.random.rand(5, 3)
df = pl.from_numpy(
    arr,
    schema=['col1', 'col2', 'col3'],
    orient='row'
)

# 1D array to single-column DataFrame
arr_1d = np.array([1, 2, 3, 4, 5])
df = pl.from_numpy(arr_1d, schema=['values'])

From Python Dictionaries

# Dictionary with lists
data = {
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35],
    'city': ['NYC', 'LA', 'Chicago']
}
df = pl.from_dict(data)

# List of dictionaries (records)
records = [
    {'name': 'Alice', 'age': 25, 'city': 'NYC'},
    {'name': 'Bob', 'age': 30, 'city': 'LA'},
    {'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
]
df = pl.from_dicts(records)

# List of tuples/lists
tuples = [
    ('Alice', 25, 'NYC'),
    ('Bob', 30, 'LA'),
    ('Charlie', 35, 'Chicago')
]
df = pl.from_records(
    tuples,
    schema=['name', 'age', 'city']
)

From PyArrow

import pyarrow as pa

# Create PyArrow table
arrow_table = pa.table({
    'integers': [1, 2, 3, 4],
    'floats': [1.1, 2.2, 3.3, 4.4],
    'strings': ['a', 'b', 'c', 'd']
})

# Convert to Polars
df = pl.from_arrow(arrow_table)

# With schema overrides
df = pl.from_arrow(
    arrow_table,
    schema_overrides={'integers': pl.Int32}
)

JSON Normalization

# Nested JSON data
json_data = [
    {
        'name': 'Alice',
        'address': {
            'street': '123 Main St',
            'city': 'NYC',
            'coordinates': {'lat': 40.7, 'lon': -74.0}
        },
        'hobbies': ['reading', 'swimming']
    },
    {
        'name': 'Bob',
        'address': {
            'street': '456 Oak Ave',
            'city': 'LA',
            'coordinates': {'lat': 34.0, 'lon': -118.2}
        },
        'hobbies': ['cycling', 'cooking', 'gaming']
    }
]

# Normalize nested structure
df = pl.json_normalize(
    json_data,
    separator='_',
    max_level=2
)

Integration with ML Libraries

# From PyTorch tensor
import torch

tensor = torch.randn(100, 5)
df = pl.from_torch(
    tensor,
    schema=['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5']
)

# Convert back to tensor for ML
tensor_back = torch.from_numpy(df.to_numpy())

Bidirectional Conversion

# Polars -> Pandas -> Polars
original_df = pl.DataFrame({
    'a': [1, 2, 3],
    'b': ['x', 'y', 'z']
})

# Convert to pandas
pandas_df = original_df.to_pandas()

# Convert back to polars
restored_df = pl.from_pandas(pandas_df)

# Polars -> Arrow -> Polars
arrow_table = original_df.to_arrow()
restored_df = pl.from_arrow(arrow_table)

Complex Schema Handling

# Mixed data types with schema overrides
mixed_data = {
    'ids': [1, 2, 3, 4],
    'values': [1.1, 2.2, 3.3, 4.4],
    'categories': ['A', 'B', 'A', 'C'],
    'timestamps': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04']
}

df = pl.from_dict(
    mixed_data,
    schema_overrides={
        'ids': pl.Int32,
        'categories': pl.Categorical,
        'timestamps': pl.Datetime
    }
)

Install with Tessl CLI