0
# Dimensionality Reduction and Projection
1
2
Orange3 provides techniques for reducing data dimensionality and creating low-dimensional representations for visualization and analysis.
3
4
## Capabilities
5
6
### Principal Component Analysis
7
8
Linear dimensionality reduction using SVD.
9
10
```python { .api }
11
class PCA:
12
"""
13
Principal Component Analysis.
14
15
Args:
16
n_components: Number of components to keep
17
copy: Whether to copy data
18
whiten: Whether to whiten components
19
svd_solver: SVD solver algorithm
20
random_state: Random seed
21
"""
22
def __init__(self, n_components=None, copy=True, whiten=False,
23
svd_solver='auto', random_state=None): ...
24
25
def __call__(self, data):
26
"""
27
Apply PCA transformation to data.
28
29
Args:
30
data: Orange Table
31
32
Returns:
33
Table with transformed data
34
"""
35
36
@property
37
def components_(self):
38
"""Principal axes in feature space."""
39
40
@property
41
def explained_variance_ratio_(self):
42
"""Percentage of variance explained by each component."""
43
44
class SparsePCA:
45
"""Sparse Principal Component Analysis."""
46
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01): ...
47
48
def __call__(self, data):
49
"""Apply sparse PCA transformation."""
50
51
class IncrementalPCA:
52
"""Incremental PCA for large datasets."""
53
def __init__(self, n_components=None, whiten=False, copy=True, batch_size=None): ...
54
55
def __call__(self, data):
56
"""Apply incremental PCA transformation."""
57
```
58
59
### Linear Discriminant Analysis
60
61
Supervised dimensionality reduction for classification.
62
63
```python { .api }
64
class LDA:
65
"""
66
Linear Discriminant Analysis.
67
68
Args:
69
n_components: Number of components
70
solver: Solver algorithm ('svd', 'lsqr', 'eigen')
71
shrinkage: Shrinkage parameter for covariance estimation
72
"""
73
def __init__(self, n_components=None, solver='svd', shrinkage=None): ...
74
75
def __call__(self, data):
76
"""
77
Apply LDA transformation to data.
78
79
Args:
80
data: Orange Table with class labels
81
82
Returns:
83
Table with transformed data
84
"""
85
86
@property
87
def scalings_(self):
88
"""Scaling matrix."""
89
90
@property
91
def explained_variance_ratio_(self):
92
"""Percentage of variance explained by each component."""
93
```
94
95
### Matrix Decomposition
96
97
SVD-based dimensionality reduction techniques.
98
99
```python { .api }
100
class TruncatedSVD:
101
"""
102
Truncated Singular Value Decomposition.
103
104
Args:
105
n_components: Number of components
106
algorithm: SVD algorithm ('arpack', 'randomized')
107
n_iter: Number of iterations for randomized solver
108
random_state: Random seed
109
"""
110
def __init__(self, n_components=2, algorithm='randomized', n_iter=5, random_state=None): ...
111
112
def __call__(self, data):
113
"""Apply truncated SVD transformation."""
114
115
@property
116
def explained_variance_ratio_(self):
117
"""Percentage of variance explained by each component."""
118
119
class CUR:
120
"""
121
CUR matrix decomposition.
122
123
Args:
124
rank: Target rank of decomposition
125
max_error: Maximum reconstruction error
126
"""
127
def __init__(self, rank=3, max_error=1): ...
128
129
def __call__(self, data):
130
"""Apply CUR decomposition."""
131
```
132
133
### Visualization Projections
134
135
Specialized projections for data visualization.
136
137
```python { .api }
138
class FreeViz:
139
"""
140
FreeViz projection for visualization.
141
"""
142
def __call__(self, data):
143
"""
144
Apply FreeViz transformation.
145
146
Args:
147
data: Orange Table with class labels
148
149
Returns:
150
Table with 2D projection
151
"""
152
153
class RadViz:
154
"""
155
RadViz (Radial Visualization) projection.
156
"""
157
def __call__(self, data):
158
"""
159
Apply RadViz transformation.
160
161
Args:
162
data: Orange Table with class labels
163
164
Returns:
165
Table with 2D projection
166
"""
167
```
168
169
### Base Classes
170
171
Foundation classes for projection algorithms.
172
173
```python { .api }
174
class Projector:
175
"""Base class for all projection methods."""
176
def __call__(self, data):
177
"""Apply projection to data."""
178
179
def transform(self, data):
180
"""Transform data using fitted projector."""
181
182
class Projection:
183
"""Container for projection results."""
184
def __init__(self, proj_data, projection): ...
185
186
@property
187
def data(self):
188
"""Transformed data."""
189
190
@property
191
def projection(self):
192
"""Projection object."""
193
194
class SklProjector(Projector):
195
"""Wrapper for scikit-learn projection methods."""
196
def __init__(self, skl_proj): ...
197
```
198
199
### Manifold Learning
200
201
Non-linear dimensionality reduction techniques.
202
203
```python { .api }
204
# Note: These would be available through Orange.projection.manifold
205
class MDS:
206
"""Multidimensional Scaling."""
207
def __init__(self, n_components=2, metric=True, dissimilarity='euclidean'): ...
208
209
def __call__(self, data):
210
"""Apply MDS transformation."""
211
212
class Isomap:
213
"""Isomap embedding."""
214
def __init__(self, n_components=2, n_neighbors=5): ...
215
216
def __call__(self, data):
217
"""Apply Isomap transformation."""
218
219
class LocallyLinearEmbedding:
220
"""Locally Linear Embedding."""
221
def __init__(self, n_components=2, n_neighbors=5, method='standard'): ...
222
223
def __call__(self, data):
224
"""Apply LLE transformation."""
225
226
class TSNE:
227
"""t-distributed Stochastic Neighbor Embedding."""
228
def __init__(self, n_components=2, perplexity=30.0, learning_rate=200.0): ...
229
230
def __call__(self, data):
231
"""Apply t-SNE transformation."""
232
```
233
234
### Usage Examples
235
236
```python
237
# Basic projection workflow
238
from Orange.data import Table
239
from Orange.projection import PCA, LDA, FreeViz
240
241
# Load data
242
data = Table("iris")
243
244
# Principal Component Analysis
245
pca = PCA(n_components=2)
246
pca_data = pca(data)
247
248
print(f"Original data shape: {data.X.shape}")
249
print(f"PCA data shape: {pca_data.X.shape}")
250
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
251
252
# Linear Discriminant Analysis (requires class labels)
253
lda = LDA(n_components=2)
254
lda_data = lda(data)
255
256
print(f"LDA data shape: {lda_data.X.shape}")
257
print(f"LDA explained variance: {lda.explained_variance_ratio_}")
258
259
# Visualization projections
260
freeviz = FreeViz()
261
freeviz_data = freeviz(data)
262
263
print(f"FreeViz projection shape: {freeviz_data.X.shape}")
264
265
# Different PCA variants
266
from Orange.projection import SparsePCA, IncrementalPCA
267
268
sparse_pca = SparsePCA(n_components=2, alpha=0.1)
269
sparse_data = sparse_pca(data)
270
271
incremental_pca = IncrementalPCA(n_components=2)
272
incremental_data = incremental_pca(data)
273
274
# SVD-based methods
275
from Orange.projection import TruncatedSVD, CUR
276
277
svd = TruncatedSVD(n_components=2)
278
svd_data = svd(data)
279
280
cur = CUR(rank=2)
281
cur_data = cur(data)
282
283
# Manifold learning examples
284
from Orange.projection.manifold import MDS, TSNE, Isomap
285
286
# Multidimensional Scaling
287
mds = MDS(n_components=2, metric=True)
288
mds_data = mds(data)
289
290
# t-SNE (computationally intensive)
291
tsne = TSNE(n_components=2, perplexity=30, learning_rate=200)
292
tsne_data = tsne(data)
293
294
# Isomap
295
isomap = Isomap(n_components=2, n_neighbors=5)
296
isomap_data = isomap(data)
297
298
print(f"MDS projection shape: {mds_data.X.shape}")
299
print(f"t-SNE projection shape: {tsne_data.X.shape}")
300
print(f"Isomap projection shape: {isomap_data.X.shape}")
301
302
# Analyze projection quality
303
import numpy as np
304
305
# For PCA, check cumulative explained variance
306
cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
307
print(f"Cumulative variance explained: {cumulative_variance}")
308
309
# Compare different numbers of components
310
for n_comp in [1, 2, 3, 4]:
311
pca_n = PCA(n_components=n_comp)
312
pca_n_data = pca_n(data)
313
total_variance = np.sum(pca_n.explained_variance_ratio_)
314
print(f"PCA with {n_comp} components explains {total_variance:.3f} of variance")
315
```