0
# Vector Operations
1
2
Core vector functionality providing dense and sparse representations with unified operations. Essential for feature representations and mathematical computations in machine learning applications.
3
4
## Capabilities
5
6
### Vector Creation
7
8
Create vectors using factory methods from the `Vectors` object.
9
10
```scala { .api }
11
/**
12
* Factory methods for creating Vector instances
13
*/
14
object Vectors {
15
/** Creates a dense vector from individual values */
16
def dense(firstValue: Double, otherValues: Double*): Vector
17
18
/** Creates a dense vector from an array */
19
def dense(values: Array[Double]): Vector
20
21
/** Creates a sparse vector from indices and values arrays */
22
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector
23
24
/** Creates a sparse vector from sequence of (index, value) pairs */
25
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector
26
27
/** Creates a sparse vector from Java collections */
28
def sparse(size: Int, elements: java.lang.Iterable[(java.lang.Integer, java.lang.Double)]): Vector
29
30
/** Creates a zero vector of specified size */
31
def zeros(size: Int): Vector
32
}
33
```
34
35
**Usage Examples:**
36
37
```scala
38
import org.apache.spark.ml.linalg.Vectors
39
40
// Dense vectors
41
val dense1 = Vectors.dense(1.0, 2.0, 3.0)
42
val dense2 = Vectors.dense(Array(1.0, 2.0, 3.0, 4.0))
43
val zeros = Vectors.zeros(5)
44
45
// Sparse vectors
46
val sparse1 = Vectors.sparse(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0))
47
val sparse2 = Vectors.sparse(4, Seq((0, 1.0), (3, 4.0)))
48
```
49
50
### Vector Operations
51
52
Common operations available on all vector types.
53
54
```scala { .api }
55
/**
56
* Base Vector trait with common operations
57
*/
58
trait Vector extends Serializable {
59
/** Size of the vector */
60
def size: Int
61
62
/** Converts the vector to a double array */
63
def toArray: Array[Double]
64
65
/** Gets the value at index i */
66
def apply(i: Int): Double
67
68
/** Creates a deep copy of the vector */
69
def copy: Vector
70
71
/** Applies function to all active (non-zero) elements */
72
def foreachActive(f: (Int, Double) => Unit): Unit
73
74
/** Number of active (explicitly stored) entries */
75
def numActives: Int
76
77
/** Number of non-zero elements */
78
def numNonzeros: Int
79
80
/** Converts to sparse representation */
81
def toSparse: SparseVector
82
83
/** Converts to dense representation */
84
def toDense: DenseVector
85
86
/** Returns optimal representation (dense or sparse) */
87
def compressed: Vector
88
89
/** Returns index of maximum element, -1 if empty */
90
def argmax: Int
91
}
92
```
93
94
**Usage Examples:**
95
96
```scala
97
import org.apache.spark.ml.linalg.Vectors
98
99
val vector = Vectors.dense(1.0, 0.0, 3.0, 0.0, 5.0)
100
101
// Basic operations
102
println(s"Size: ${vector.size}") // Size: 5
103
println(s"Element at index 2: ${vector(2)}") // Element at index 2: 3.0
104
println(s"Non-zeros: ${vector.numNonzeros}") // Non-zeros: 3
105
println(s"Max index: ${vector.argmax}") // Max index: 4
106
107
// Conversions
108
val sparse = vector.toSparse
109
val dense = sparse.toDense
110
val compressed = vector.compressed
111
112
// Iterate over active elements
113
vector.foreachActive { (index, value) =>
114
if (value != 0.0) println(s"[$index] = $value")
115
}
116
```
117
118
### Dense Vectors
119
120
Dense vector implementation storing all elements in a contiguous array.
121
122
```scala { .api }
123
/**
124
* Dense vector represented by a value array
125
*/
126
class DenseVector(val values: Array[Double]) extends Vector {
127
/** The underlying array of values */
128
val values: Array[Double]
129
}
130
131
object DenseVector {
132
/** Extracts values array from dense vector for pattern matching */
133
def unapply(dv: DenseVector): Option[Array[Double]]
134
}
135
```
136
137
**Usage Examples:**
138
139
```scala
140
import org.apache.spark.ml.linalg.{DenseVector, Vectors}
141
142
// Create dense vector
143
val dense = new DenseVector(Array(1.0, 2.0, 3.0))
144
val dense2 = Vectors.dense(1.0, 2.0, 3.0).asInstanceOf[DenseVector]
145
146
// Access underlying array
147
val array = dense.values
148
println(array.mkString("[", ", ", "]")) // [1.0, 2.0, 3.0]
149
150
// Pattern matching
151
dense match {
152
case DenseVector(values) => println(s"Dense with ${values.length} elements")
153
case _ => println("Not a dense vector")
154
}
155
```
156
157
### Sparse Vectors
158
159
Sparse vector implementation storing only non-zero elements with separate index and value arrays.
160
161
```scala { .api }
162
/**
163
* Sparse vector represented by index and value arrays
164
*/
165
class SparseVector(
166
override val size: Int,
167
val indices: Array[Int],
168
val values: Array[Double]
169
) extends Vector {
170
/** Size of the vector */
171
override val size: Int
172
173
/** Indices of non-zero elements (strictly increasing) */
174
val indices: Array[Int]
175
176
/** Values corresponding to the indices */
177
val values: Array[Double]
178
}
179
180
object SparseVector {
181
/** Extracts size, indices, and values for pattern matching */
182
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])]
183
}
184
```
185
186
**Usage Examples:**
187
188
```scala
189
import org.apache.spark.ml.linalg.{SparseVector, Vectors}
190
191
// Create sparse vector: [1.0, 0.0, 3.0, 0.0, 5.0]
192
val sparse = new SparseVector(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0))
193
val sparse2 = Vectors.sparse(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0)).asInstanceOf[SparseVector]
194
195
// Access components
196
println(s"Size: ${sparse.size}") // Size: 5
197
println(s"Indices: ${sparse.indices.mkString(", ")}") // Indices: 0, 2, 4
198
println(s"Values: ${sparse.values.mkString(", ")}") // Values: 1.0, 3.0, 5.0
199
200
// Pattern matching
201
sparse match {
202
case SparseVector(size, indices, values) =>
203
println(s"Sparse vector of size $size with ${indices.length} non-zero elements")
204
case _ => println("Not a sparse vector")
205
}
206
207
// Convert to dense array
208
val array = sparse.toArray
209
println(array.mkString("[", ", ", "]")) // [1.0, 0.0, 3.0, 0.0, 5.0]
210
```
211
212
### Vector Utility Functions
213
214
Utility functions for vector computations.
215
216
```scala { .api }
217
object Vectors {
218
/** Computes the p-norm of a vector */
219
def norm(vector: Vector, p: Double): Double
220
221
/** Computes squared Euclidean distance between two vectors */
222
def sqdist(v1: Vector, v2: Vector): Double
223
}
224
```
225
226
**Usage Examples:**
227
228
```scala
229
import org.apache.spark.ml.linalg.Vectors
230
231
val v1 = Vectors.dense(1.0, 2.0, 3.0)
232
val v2 = Vectors.dense(4.0, 5.0, 6.0)
233
234
// Compute norms
235
val l1Norm = Vectors.norm(v1, 1.0) // L1 norm: 6.0
236
val l2Norm = Vectors.norm(v1, 2.0) // L2 norm: √14 ≈ 3.74
237
val infNorm = Vectors.norm(v1, Double.PositiveInfinity) // L∞ norm: 3.0
238
239
// Compute squared distance
240
val sqDist = Vectors.sqdist(v1, v2) // (4-1)² + (5-2)² + (6-3)² = 27.0
241
```
242
243
## Types
244
245
```scala { .api }
246
// Core vector types
247
sealed trait Vector extends Serializable
248
class DenseVector(val values: Array[Double]) extends Vector
249
class SparseVector(override val size: Int, val indices: Array[Int], val values: Array[Double]) extends Vector
250
```