Orange, a component-based data mining framework.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Orange3's data handling system provides comprehensive functionality for loading, manipulating, and transforming datasets. The core data structure is the Table class, which combines data storage with metadata through the Domain system.
The Table class is Orange3's primary data structure, representing datasets with features, target variables, and metadata.
class Table:
"""
Data table with Orange-specific data structures.
Use factory methods for creating Table instances:
- Table.from_file() for loading from files
- Table.from_domain() for creating empty tables
- Table.from_table() for transforming existing tables
- Table.from_numpy() for creating from arrays
"""
@classmethod
def from_file(cls, filename, **kwargs):
"""Load table from file (recommended way to load data)."""
@classmethod
def from_domain(cls, domain, n_rows=0, weights=False):
"""Create empty table with given domain and number of rows."""
@classmethod
def from_table(cls, domain, source, row_indices=...):
"""Create table from selected columns/rows of existing table."""
@classmethod
def from_numpy(cls, domain, X, Y=None, metas=None, **kwargs):
"""Create table from numpy arrays."""
@classmethod
def from_url(cls, url, **kwargs):
"""Load table from URL."""
def save(self, filename):
"""Save table to file."""
def copy(self):
"""Create a copy of the table."""
def transform(self, domain):
"""Transform table to match new domain."""
def select_rows(self, row_indices):
"""Select specific rows by indices."""
def get_column_view(self, column):
"""Get column data as numpy array."""
@property
def X(self):
"""Feature data as numpy array."""
@property
def Y(self):
"""Target data as numpy array."""
@property
def metas(self):
"""Meta attribute data."""
@property
def domain(self):
"""Domain defining table structure."""Domain objects define the structure and metadata of datasets, including variable types and relationships.
class Domain:
"""
Dataset structure definition.
Args:
attributes: List of feature variables
class_vars: List of target variables
metas: List of meta variables
"""
def __init__(self, attributes, class_vars=None, metas=None): ...
def select_columns(self, columns):
"""Create new domain with selected columns."""
@property
def variables(self):
"""All variables in the domain."""
@property
def attributes(self):
"""Feature variables."""
@property
def class_vars(self):
"""Target variables."""
@property
def metas(self):
"""Meta variables."""Orange3 supports different variable types for various data formats and analysis needs.
class Variable:
"""Base class for all variable types."""
def __init__(self, name="", compute_value=None): ...
@property
def name(self):
"""Variable name."""
def copy(self, compute_value=None):
"""Create copy of variable."""
class ContinuousVariable(Variable):
"""Numeric variable for continuous values."""
def __init__(self, name="", number_of_decimals=None, compute_value=None, *, sparse=False): ...
class DiscreteVariable(Variable):
"""Categorical variable with finite set of values."""
def __init__(self, name="", values=(), ordered=False, compute_value=None, *, sparse=False): ...
@property
def values(self):
"""List of possible categorical values."""
class StringVariable(Variable):
"""Text-based variable."""
def __init__(self, name): ...
class TimeVariable(ContinuousVariable):
"""Time/datetime variable."""
def __init__(self, name, have_date=False, have_time=False): ...Support for various file formats and data sources.
class FileFormat:
"""Base class for file format handlers."""
EXTENSIONS = ()
DESCRIPTION = ""
@classmethod
def read(cls, filename):
"""Read data from file."""
@classmethod
def write(cls, filename, data):
"""Write data to file."""
def get_sample_datasets_dir():
"""
Get path to Orange's sample datasets directory.
Returns:
str: Path to datasets directory
"""Comprehensive filtering system for data selection and manipulation.
class Values:
"""Filter data based on variable values."""
def __init__(self, conditions): ...
def __call__(self, data):
"""Apply filter to data."""
class Random:
"""Random sampling filter."""
def __init__(self, prob=0.5, stratified=False): ...
def __call__(self, data):
"""Apply random sampling."""
class IsDefined:
"""Filter rows with defined (non-missing) values."""
def __init__(self, columns=None, negate=False): ...
def __call__(self, data):
"""Filter defined values."""
class SameValue:
"""Filter rows where column has same value."""
def __init__(self, column, value): ...
def __call__(self, data):
"""Apply same value filter."""Integration with popular data science libraries.
def table_from_frame(df, *, force_nominal=False, **kwargs):
"""
Convert pandas DataFrame to Orange Table.
Args:
df: pandas DataFrame
force_nominal: Force string variables to be nominal
Returns:
Table: Converted Orange table
"""
def table_to_frame(table, include_metas=True):
"""
Convert Orange Table to pandas DataFrame.
Args:
table: Orange Table
include_metas: Include meta attributes
Returns:
DataFrame: Converted pandas DataFrame
"""# Load data from file
data = Table("iris.tab")
print(f"Dataset shape: {data.X.shape}")
print(f"Features: {[var.name for var in data.domain.attributes]}")
# Create custom domain
from Orange.data import ContinuousVariable, DiscreteVariable, Domain
age = ContinuousVariable("age")
gender = DiscreteVariable("gender", values=["M", "F"])
income = ContinuousVariable("income")
domain = Domain([age, income], gender)
# Create table from arrays
import numpy as np
X = np.random.rand(100, 2)
Y = np.random.choice([0, 1], 100)
custom_data = Table.from_numpy(domain, X, Y)
# Filter data
from Orange.data import Values, IsDefined
filtered_data = Values([
Values.GE(data.domain["sepal length"], 5.0)
])(data)
# Convert to/from pandas
import pandas as pd
df = table_to_frame(data)
back_to_table = table_from_frame(df)Install with Tessl CLI
npx tessl i tessl/pypi-orange3