Orange, a component-based data mining framework.
npx @tessl/cli install tessl/pypi-orange3@3.39.0Orange3 is a comprehensive visual data mining and machine learning framework designed for both novice and expert users. It provides an intuitive drag-and-drop workflow interface while offering powerful programmatic APIs for data analysis, machine learning, and visualization. The framework includes extensive capabilities for data input/output, preprocessing, classification, regression, clustering, evaluation, and visualization.
pip install Orange3import OrangeCommon for working with data and machine learning:
from Orange.data import Table, Domain, Variable, ContinuousVariable, DiscreteVariable, StringVariable
from Orange.classification import LogisticRegressionLearner, TreeLearner, RandomForestLearner, NaiveBayesLearner
from Orange.regression import LinearRegressionLearner, RidgeRegressionLearner, LassoRegressionLearner, ElasticNetLearner, SGDRegressionLearner
from Orange.clustering import KMeans, DBSCAN, HierarchicalClustering
from Orange.evaluation import CrossValidation, CA, AUC, MSE, RMSE
from Orange.preprocess import Discretize, Impute, Normalizer, SelectBestFeatures
from Orange.projection import PCA, LDAimport Orange
from Orange.data import Table
from Orange.classification import TreeLearner
from Orange.evaluation import CrossValidation, CA
# Load a dataset (using recommended factory method)
data = Table.from_file("iris")
# Create a learner
learner = TreeLearner()
# Evaluate using cross-validation
results = CrossValidation(data, [learner], k=5)
accuracy = CA(results)
print(f"Accuracy: {accuracy[0]:.3f}")
# Train a model on full dataset
model = learner(data)
# Make predictions on new data
predictions = model(data[:5])
print(f"Predictions: {predictions}")Orange3 uses a modular, lazy-loading architecture with several key components:
The framework supports both programmatic usage and visual workflow construction, making it accessible to users with varying levels of programming experience while maintaining the power needed for advanced data science tasks.
Core data structures and operations for loading, manipulating, and transforming datasets. Includes support for various file formats, missing value handling, and domain management.
class Table:
@classmethod
def from_domain(cls, domain, n_rows=0, weights=False): ...
@classmethod
def from_table(cls, domain, source, row_indices=...): ...
@classmethod
def from_file(cls, filename, **kwargs): ...
@classmethod
def from_numpy(cls, domain, X, Y=None, metas=None, **kwargs): ...
@classmethod
def from_url(cls, url, **kwargs): ...
def save(self, filename): ...
def copy(self): ...
def transform(self, domain): ...
class Domain:
def __init__(self, attributes, class_vars=None, metas=None): ...
class Variable:
def __init__(self, name="", compute_value=None): ...
class ContinuousVariable(Variable):
def __init__(self, name="", number_of_decimals=None, compute_value=None, *, sparse=False): ...
class DiscreteVariable(Variable):
def __init__(self, name="", values=(), ordered=False, compute_value=None, *, sparse=False): ...
class StringVariable(Variable):
def __init__(self, name="", compute_value=None, *, sparse=False): ...Supervised learning algorithms for categorical prediction tasks, including tree-based methods, probabilistic classifiers, support vector machines, neural networks, and ensemble methods.
class TreeLearner:
def __init__(self, binarize=False, max_depth=None, min_samples_leaf=1,
min_samples_split=2, sufficient_majority=0.95, preprocessors=None): ...
def __call__(self, data): ...
class LogisticRegressionLearner:
def __init__(self, penalty="l2", dual=False, tol=0.0001, C=1.0,
fit_intercept=True, intercept_scaling=1, class_weight=None,
random_state=None, solver="auto", max_iter=100,
multi_class="deprecated", verbose=0, n_jobs=1, preprocessors=None): ...
def __call__(self, data): ...
class RandomForestLearner:
def __init__(self, n_estimators=10, max_depth=None, preprocessors=None): ...
def __call__(self, data): ...
class NaiveBayesLearner:
def __init__(self, preprocessors=None): ...
def __call__(self, data): ...Supervised learning algorithms for continuous prediction tasks, including linear models, tree-based regression, neural networks, and ensemble methods.
class LinearRegressionLearner:
def __init__(self, preprocessors=None, fit_intercept=True): ...
def __call__(self, data): ...
class RidgeRegressionLearner:
def __init__(self, alpha=1.0, fit_intercept=True, copy_X=True,
max_iter=None, tol=0.001, solver='auto', preprocessors=None): ...
def __call__(self, data): ...
class LassoRegressionLearner:
def __init__(self, alpha=1.0, fit_intercept=True, precompute=False,
copy_X=True, max_iter=1000, tol=0.0001, warm_start=False,
positive=False, preprocessors=None): ...
def __call__(self, data): ...
class ElasticNetLearner:
def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=True,
precompute=False, max_iter=1000, copy_X=True, tol=0.0001,
warm_start=False, positive=False, preprocessors=None): ...
def __call__(self, data): ...
class SGDRegressionLearner:
def __init__(self, loss='squared_error', penalty='l2', alpha=0.0001,
l1_ratio=0.15, fit_intercept=True, max_iter=5, tol=1e-3,
shuffle=True, epsilon=0.1, random_state=None, preprocessors=None): ...
def __call__(self, data): ...
class RandomForestRegressionLearner:
def __init__(self, n_estimators=10, max_depth=None, preprocessors=None): ...
def __call__(self, data): ...Unsupervised learning algorithms for discovering patterns and structures in data without labeled examples.
class KMeans:
def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300,
tol=0.0001, random_state=None, preprocessors=None): ...
def __call__(self, data): ...
class DBSCAN:
def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
algorithm='auto', leaf_size=30, p=None, preprocessors=None): ...
def __call__(self, data): ...
class HierarchicalClustering:
def __init__(self, n_clusters=2, linkage='average'): ...
def fit(self, X): ...
def fit_predict(self, X, y=None): ...Data transformation and preparation techniques including discretization, normalization, imputation, and feature selection.
class Discretize:
def __init__(self, method=None, n_intervals=4): ...
def __call__(self, data): ...
class Impute:
def __init__(self, method=None): ...
def __call__(self, data): ...
class Normalizer:
def __init__(self, norm_type='l2', transform_class=False): ...
def __call__(self, data): ...
class SelectBestFeatures:
def __init__(self, method=None, k=5): ...
def __call__(self, data): ...Comprehensive model evaluation framework with cross-validation, performance metrics, and statistical testing capabilities.
class CrossValidation:
def __init__(self, data, learners, k=10, stratified=True): ...
class TestOnTestData:
def __init__(self, train_data, test_data, learners): ...
def CA(results): ... # Classification Accuracy
def AUC(results): ... # Area Under Curve
def MSE(results): ... # Mean Squared Error
def RMSE(results): ... # Root Mean Squared ErrorTechniques for reducing data dimensionality and creating low-dimensional representations for visualization and analysis.
class PCA:
def __init__(self, n_components=None): ...
def __call__(self, data): ...
class LDA:
def __init__(self, n_components=None): ...
def __call__(self, data): ...
class FreeViz:
def __init__(self): ...
def __call__(self, data): ...Comprehensive collection of distance and similarity measures for various data types and analysis tasks.
class Euclidean:
def __call__(self, data): ...
class Manhattan:
def __call__(self, data): ...
class Cosine:
def __call__(self, data): ...
class Jaccard:
def __call__(self, data): ...Widget-based graphical interface components for building data analysis workflows through drag-and-drop operations.
# Widget categories accessible through Orange Canvas:
# - Orange.widgets.data: Data input/output widgets
# - Orange.widgets.visualize: Visualization widgets
# - Orange.widgets.model: Machine learning model widgets
# - Orange.widgets.evaluate: Evaluation widgets
# - Orange.widgets.unsupervised: Clustering widgets