CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-orange3

Orange, a component-based data mining framework.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

data-handling.mddocs/

Data Handling and I/O

Orange3's data handling system provides comprehensive functionality for loading, manipulating, and transforming datasets. The core data structure is the Table class, which combines data storage with metadata through the Domain system.

Capabilities

Table Operations

The Table class is Orange3's primary data structure, representing datasets with features, target variables, and metadata.

class Table:
    """
    Data table with Orange-specific data structures.
    
    Use factory methods for creating Table instances:
    - Table.from_file() for loading from files  
    - Table.from_domain() for creating empty tables
    - Table.from_table() for transforming existing tables
    - Table.from_numpy() for creating from arrays
    """
    @classmethod
    def from_file(cls, filename, **kwargs):
        """Load table from file (recommended way to load data)."""
        
    @classmethod
    def from_domain(cls, domain, n_rows=0, weights=False):
        """Create empty table with given domain and number of rows."""
        
    @classmethod  
    def from_table(cls, domain, source, row_indices=...):
        """Create table from selected columns/rows of existing table."""
        
    @classmethod
    def from_numpy(cls, domain, X, Y=None, metas=None, **kwargs):
        """Create table from numpy arrays."""
    
    @classmethod
    def from_url(cls, url, **kwargs):
        """Load table from URL."""
    
    def save(self, filename):
        """Save table to file."""
        
    def copy(self):
        """Create a copy of the table."""
        
    def transform(self, domain):
        """Transform table to match new domain."""
        
    def select_rows(self, row_indices):
        """Select specific rows by indices."""
        
    def get_column_view(self, column):
        """Get column data as numpy array."""
        
    @property
    def X(self):
        """Feature data as numpy array."""
        
    @property
    def Y(self):
        """Target data as numpy array."""
        
    @property
    def metas(self):
        """Meta attribute data."""
        
    @property
    def domain(self):
        """Domain defining table structure."""

Domain Management

Domain objects define the structure and metadata of datasets, including variable types and relationships.

class Domain:
    """
    Dataset structure definition.
    
    Args:
        attributes: List of feature variables
        class_vars: List of target variables  
        metas: List of meta variables
    """
    def __init__(self, attributes, class_vars=None, metas=None): ...
    
    def select_columns(self, columns):
        """Create new domain with selected columns."""
        
    @property
    def variables(self):
        """All variables in the domain."""
        
    @property
    def attributes(self):
        """Feature variables."""
        
    @property
    def class_vars(self):
        """Target variables."""
        
    @property
    def metas(self):
        """Meta variables."""

Variable Types

Orange3 supports different variable types for various data formats and analysis needs.

class Variable:
    """Base class for all variable types."""
    def __init__(self, name="", compute_value=None): ...
    
    @property
    def name(self):
        """Variable name."""
        
    def copy(self, compute_value=None):
        """Create copy of variable."""

class ContinuousVariable(Variable):
    """Numeric variable for continuous values."""
    def __init__(self, name="", number_of_decimals=None, compute_value=None, *, sparse=False): ...

class DiscreteVariable(Variable):
    """Categorical variable with finite set of values."""
    def __init__(self, name="", values=(), ordered=False, compute_value=None, *, sparse=False): ...
    
    @property
    def values(self):
        """List of possible categorical values."""

class StringVariable(Variable):
    """Text-based variable."""
    def __init__(self, name): ...

class TimeVariable(ContinuousVariable):
    """Time/datetime variable."""
    def __init__(self, name, have_date=False, have_time=False): ...

File I/O Operations

Support for various file formats and data sources.

class FileFormat:
    """Base class for file format handlers."""
    EXTENSIONS = ()
    DESCRIPTION = ""
    
    @classmethod
    def read(cls, filename):
        """Read data from file."""
        
    @classmethod
    def write(cls, filename, data):
        """Write data to file."""

def get_sample_datasets_dir():
    """
    Get path to Orange's sample datasets directory.
    
    Returns:
        str: Path to datasets directory
    """

Data Filtering

Comprehensive filtering system for data selection and manipulation.

class Values:
    """Filter data based on variable values."""
    def __init__(self, conditions): ...
    
    def __call__(self, data):
        """Apply filter to data."""

class Random:
    """Random sampling filter."""
    def __init__(self, prob=0.5, stratified=False): ...
    
    def __call__(self, data):
        """Apply random sampling."""

class IsDefined:
    """Filter rows with defined (non-missing) values."""
    def __init__(self, columns=None, negate=False): ...
    
    def __call__(self, data):
        """Filter defined values."""

class SameValue:
    """Filter rows where column has same value."""
    def __init__(self, column, value): ...
    
    def __call__(self, data):
        """Apply same value filter."""

Data Conversion and Compatibility

Integration with popular data science libraries.

def table_from_frame(df, *, force_nominal=False, **kwargs):
    """
    Convert pandas DataFrame to Orange Table.
    
    Args:
        df: pandas DataFrame
        force_nominal: Force string variables to be nominal
        
    Returns:
        Table: Converted Orange table
    """

def table_to_frame(table, include_metas=True):
    """
    Convert Orange Table to pandas DataFrame.
    
    Args:
        table: Orange Table
        include_metas: Include meta attributes
        
    Returns:
        DataFrame: Converted pandas DataFrame
    """

Usage Examples

# Load data from file
data = Table("iris.tab")
print(f"Dataset shape: {data.X.shape}")
print(f"Features: {[var.name for var in data.domain.attributes]}")

# Create custom domain
from Orange.data import ContinuousVariable, DiscreteVariable, Domain
age = ContinuousVariable("age")
gender = DiscreteVariable("gender", values=["M", "F"])
income = ContinuousVariable("income")
domain = Domain([age, income], gender)

# Create table from arrays
import numpy as np
X = np.random.rand(100, 2)
Y = np.random.choice([0, 1], 100)
custom_data = Table.from_numpy(domain, X, Y)

# Filter data
from Orange.data import Values, IsDefined
filtered_data = Values([
    Values.GE(data.domain["sepal length"], 5.0)
])(data)

# Convert to/from pandas
import pandas as pd
df = table_to_frame(data)
back_to_table = table_from_frame(df)

Install with Tessl CLI

npx tessl i tessl/pypi-orange3

docs

classification.md

clustering.md

data-handling.md

distance.md

evaluation.md

index.md

preprocessing.md

projection.md

regression.md

widgets.md

tile.json