Orange, a component-based data mining framework.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Orange3 provides techniques for reducing data dimensionality and creating low-dimensional representations for visualization and analysis.
Linear dimensionality reduction using SVD.
class PCA:
"""
Principal Component Analysis.
Args:
n_components: Number of components to keep
copy: Whether to copy data
whiten: Whether to whiten components
svd_solver: SVD solver algorithm
random_state: Random seed
"""
def __init__(self, n_components=None, copy=True, whiten=False,
svd_solver='auto', random_state=None): ...
def __call__(self, data):
"""
Apply PCA transformation to data.
Args:
data: Orange Table
Returns:
Table with transformed data
"""
@property
def components_(self):
"""Principal axes in feature space."""
@property
def explained_variance_ratio_(self):
"""Percentage of variance explained by each component."""
class SparsePCA:
"""Sparse Principal Component Analysis."""
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01): ...
def __call__(self, data):
"""Apply sparse PCA transformation."""
class IncrementalPCA:
"""Incremental PCA for large datasets."""
def __init__(self, n_components=None, whiten=False, copy=True, batch_size=None): ...
def __call__(self, data):
"""Apply incremental PCA transformation."""Supervised dimensionality reduction for classification.
class LDA:
"""
Linear Discriminant Analysis.
Args:
n_components: Number of components
solver: Solver algorithm ('svd', 'lsqr', 'eigen')
shrinkage: Shrinkage parameter for covariance estimation
"""
def __init__(self, n_components=None, solver='svd', shrinkage=None): ...
def __call__(self, data):
"""
Apply LDA transformation to data.
Args:
data: Orange Table with class labels
Returns:
Table with transformed data
"""
@property
def scalings_(self):
"""Scaling matrix."""
@property
def explained_variance_ratio_(self):
"""Percentage of variance explained by each component."""SVD-based dimensionality reduction techniques.
class TruncatedSVD:
"""
Truncated Singular Value Decomposition.
Args:
n_components: Number of components
algorithm: SVD algorithm ('arpack', 'randomized')
n_iter: Number of iterations for randomized solver
random_state: Random seed
"""
def __init__(self, n_components=2, algorithm='randomized', n_iter=5, random_state=None): ...
def __call__(self, data):
"""Apply truncated SVD transformation."""
@property
def explained_variance_ratio_(self):
"""Percentage of variance explained by each component."""
class CUR:
"""
CUR matrix decomposition.
Args:
rank: Target rank of decomposition
max_error: Maximum reconstruction error
"""
def __init__(self, rank=3, max_error=1): ...
def __call__(self, data):
"""Apply CUR decomposition."""Specialized projections for data visualization.
class FreeViz:
"""
FreeViz projection for visualization.
"""
def __call__(self, data):
"""
Apply FreeViz transformation.
Args:
data: Orange Table with class labels
Returns:
Table with 2D projection
"""
class RadViz:
"""
RadViz (Radial Visualization) projection.
"""
def __call__(self, data):
"""
Apply RadViz transformation.
Args:
data: Orange Table with class labels
Returns:
Table with 2D projection
"""Foundation classes for projection algorithms.
class Projector:
"""Base class for all projection methods."""
def __call__(self, data):
"""Apply projection to data."""
def transform(self, data):
"""Transform data using fitted projector."""
class Projection:
"""Container for projection results."""
def __init__(self, proj_data, projection): ...
@property
def data(self):
"""Transformed data."""
@property
def projection(self):
"""Projection object."""
class SklProjector(Projector):
"""Wrapper for scikit-learn projection methods."""
def __init__(self, skl_proj): ...Non-linear dimensionality reduction techniques.
# Note: These would be available through Orange.projection.manifold
class MDS:
"""Multidimensional Scaling."""
def __init__(self, n_components=2, metric=True, dissimilarity='euclidean'): ...
def __call__(self, data):
"""Apply MDS transformation."""
class Isomap:
"""Isomap embedding."""
def __init__(self, n_components=2, n_neighbors=5): ...
def __call__(self, data):
"""Apply Isomap transformation."""
class LocallyLinearEmbedding:
"""Locally Linear Embedding."""
def __init__(self, n_components=2, n_neighbors=5, method='standard'): ...
def __call__(self, data):
"""Apply LLE transformation."""
class TSNE:
"""t-distributed Stochastic Neighbor Embedding."""
def __init__(self, n_components=2, perplexity=30.0, learning_rate=200.0): ...
def __call__(self, data):
"""Apply t-SNE transformation."""# Basic projection workflow
from Orange.data import Table
from Orange.projection import PCA, LDA, FreeViz
# Load data
data = Table("iris")
# Principal Component Analysis
pca = PCA(n_components=2)
pca_data = pca(data)
print(f"Original data shape: {data.X.shape}")
print(f"PCA data shape: {pca_data.X.shape}")
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
# Linear Discriminant Analysis (requires class labels)
lda = LDA(n_components=2)
lda_data = lda(data)
print(f"LDA data shape: {lda_data.X.shape}")
print(f"LDA explained variance: {lda.explained_variance_ratio_}")
# Visualization projections
freeviz = FreeViz()
freeviz_data = freeviz(data)
print(f"FreeViz projection shape: {freeviz_data.X.shape}")
# Different PCA variants
from Orange.projection import SparsePCA, IncrementalPCA
sparse_pca = SparsePCA(n_components=2, alpha=0.1)
sparse_data = sparse_pca(data)
incremental_pca = IncrementalPCA(n_components=2)
incremental_data = incremental_pca(data)
# SVD-based methods
from Orange.projection import TruncatedSVD, CUR
svd = TruncatedSVD(n_components=2)
svd_data = svd(data)
cur = CUR(rank=2)
cur_data = cur(data)
# Manifold learning examples
from Orange.projection.manifold import MDS, TSNE, Isomap
# Multidimensional Scaling
mds = MDS(n_components=2, metric=True)
mds_data = mds(data)
# t-SNE (computationally intensive)
tsne = TSNE(n_components=2, perplexity=30, learning_rate=200)
tsne_data = tsne(data)
# Isomap
isomap = Isomap(n_components=2, n_neighbors=5)
isomap_data = isomap(data)
print(f"MDS projection shape: {mds_data.X.shape}")
print(f"t-SNE projection shape: {tsne_data.X.shape}")
print(f"Isomap projection shape: {isomap_data.X.shape}")
# Analyze projection quality
import numpy as np
# For PCA, check cumulative explained variance
cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
print(f"Cumulative variance explained: {cumulative_variance}")
# Compare different numbers of components
for n_comp in [1, 2, 3, 4]:
pca_n = PCA(n_components=n_comp)
pca_n_data = pca_n(data)
total_variance = np.sum(pca_n.explained_variance_ratio_)
print(f"PCA with {n_comp} components explains {total_variance:.3f} of variance")Install with Tessl CLI
npx tessl i tessl/pypi-orange3