0
# NumPy Integration
1
2
pybind11 provides comprehensive integration with NumPy arrays, enabling high-performance data exchange between C++ and Python numerical code. This integration supports the buffer protocol, automatic type conversion, and vectorization of C++ functions.
3
4
## Capabilities
5
6
### NumPy Array Wrapper
7
8
Core classes for working with NumPy arrays from C++.
9
10
```cpp { .api }
11
class array : public object {
12
public:
13
// Constructors
14
array();
15
array(const buffer_info &info);
16
17
// Array properties
18
ssize_t ndim() const; // Number of dimensions
19
const ssize_t* shape() const; // Array shape
20
const ssize_t* strides() const; // Array strides
21
ssize_t size() const; // Total number of elements
22
ssize_t itemsize() const; // Size of each element
23
ssize_t nbytes() const; // Total number of bytes
24
25
// Data access
26
void* data() const; // Raw data pointer
27
template<typename T> T* data() const; // Typed data pointer
28
29
// Array metadata
30
std::string dtype() const; // NumPy dtype string
31
bool owndata() const; // Whether array owns its data
32
bool writeable() const; // Whether array is writeable
33
34
// Buffer protocol
35
buffer_info request(bool writable = false) const;
36
};
37
38
template<typename T>
39
class array_t : public array {
40
public:
41
// Type-specific array wrapper
42
using value_type = T;
43
44
// Constructors
45
array_t();
46
array_t(size_t size);
47
array_t(const std::vector<size_t> &shape);
48
array_t(const std::vector<size_t> &shape, const std::vector<size_t> &strides);
49
array_t(const buffer_info &info);
50
51
// Typed data access
52
T* data() const;
53
T* mutable_data() const;
54
55
// Element access (for 1D arrays)
56
T& operator[](ssize_t index);
57
const T& operator[](ssize_t index) const;
58
59
// Multi-dimensional access
60
template<typename... Indices>
61
T& operator()(Indices... indices);
62
63
template<typename... Indices>
64
const T& operator()(Indices... indices) const;
65
66
// Iteration
67
T* begin() const;
68
T* end() const;
69
};
70
```
71
72
### Data Type Support
73
74
NumPy data type integration and conversion.
75
76
```cpp { .api }
77
class dtype : public object {
78
public:
79
// Get dtype for C++ type
80
template<typename T>
81
static dtype of();
82
83
// Dtype properties
84
ssize_t itemsize() const;
85
std::string format() const;
86
char kind() const;
87
88
// Type checking
89
bool is_equiv(const dtype &other) const;
90
};
91
92
// Supported automatic conversions:
93
// C++ type -> NumPy dtype
94
// bool -> bool
95
// int8_t -> int8
96
// uint8_t -> uint8
97
// int16_t -> int16
98
// uint16_t -> uint16
99
// int32_t -> int32
100
// uint32_t -> uint32
101
// int64_t -> int64
102
// uint64_t -> uint64
103
// float -> float32
104
// double -> float64
105
// std::complex<float> -> complex64
106
// std::complex<double> -> complex128
107
```
108
109
### Function Vectorization
110
111
Automatically vectorize C++ functions to work with NumPy arrays.
112
113
```cpp { .api }
114
// Vectorize a function to work element-wise on arrays
115
template<typename Func>
116
auto vectorize(Func &&f);
117
118
// Vectorize with custom function object
119
template<typename Return, typename... Args>
120
class vectorized_function {
121
public:
122
vectorized_function(std::function<Return(Args...)> f);
123
124
// Call operator for vectorized execution
125
array_t<Return> operator()(const array_t<Args>&... arrays);
126
};
127
```
128
129
### Buffer Protocol Integration
130
131
Direct integration with Python's buffer protocol for efficient data sharing.
132
133
```cpp { .api }
134
class buffer_info {
135
public:
136
void *ptr; // Pointer to buffer data
137
ssize_t itemsize; // Size of individual items in bytes
138
std::string format; // Buffer format string (struct module style)
139
ssize_t ndim; // Number of dimensions
140
std::vector<ssize_t> shape; // Shape of buffer
141
std::vector<ssize_t> strides; // Strides for each dimension
142
bool readonly; // Whether buffer is read-only
143
144
buffer_info(void *ptr, ssize_t itemsize, const std::string &format,
145
ssize_t ndim, std::vector<ssize_t> shape,
146
std::vector<ssize_t> strides, bool readonly = false);
147
};
148
149
// Enable buffer protocol for custom classes
150
class MyClass {
151
public:
152
buffer_info get_buffer_info(); // Implement this method
153
};
154
155
// In binding code:
156
py::class_<MyClass>(m, "MyClass")
157
.def_buffer(&MyClass::get_buffer_info);
158
```
159
160
### Memory Management
161
162
Control memory allocation and lifetime for NumPy arrays.
163
164
```cpp { .api }
165
// Memory management flags for array creation
166
enum class array_c_style { f = detail::npy_api::NPY_ARRAY_C_CONTIGUOUS_ };
167
enum class array_f_style { f = detail::npy_api::NPY_ARRAY_F_CONTIGUOUS_ };
168
enum class array_forcecast { f = detail::npy_api::NPY_ARRAY_FORCECAST_ };
169
170
// Create array with specific memory layout
171
template<typename T>
172
array_t<T> make_array(const std::vector<size_t> &shape,
173
const T* data = nullptr,
174
handle base = handle());
175
```
176
177
### Eigen Integration
178
179
Integration with the Eigen linear algebra library (requires `#include <pybind11/eigen.h>`).
180
181
```cpp { .api }
182
// Automatic conversion between Eigen matrices and NumPy arrays
183
// Eigen::Matrix<T, Rows, Cols> <-> numpy.ndarray
184
// Eigen::VectorXd <-> numpy.ndarray (1D)
185
// Eigen::MatrixXd <-> numpy.ndarray (2D)
186
187
// Eigen types are automatically supported in function signatures
188
Eigen::MatrixXd process_matrix(const Eigen::MatrixXd& input);
189
Eigen::VectorXd solve_system(const Eigen::MatrixXd& A, const Eigen::VectorXd& b);
190
```
191
192
## Usage Examples
193
194
### Basic Array Operations
195
196
```cpp
197
#include <pybind11/pybind11.h>
198
#include <pybind11/numpy.h>
199
200
namespace py = pybind11;
201
202
// Function that works with NumPy arrays
203
py::array_t<double> square_array(py::array_t<double> input) {
204
// Get buffer info
205
py::buffer_info buf = input.request();
206
207
// Check that we have a 1-D array
208
if (buf.ndim != 1)
209
throw std::runtime_error("Input array must be 1-dimensional");
210
211
// Create output array
212
auto result = py::array_t<double>(buf.size);
213
py::buffer_info res_buf = result.request();
214
215
// Perform computation
216
double *input_ptr = static_cast<double*>(buf.ptr);
217
double *output_ptr = static_cast<double*>(res_buf.ptr);
218
219
for (size_t i = 0; i < buf.shape[0]; i++) {
220
output_ptr[i] = input_ptr[i] * input_ptr[i];
221
}
222
223
return result;
224
}
225
226
PYBIND11_MODULE(example, m) {
227
m.def("square_array", &square_array, "Square all elements in array");
228
}
229
```
230
231
### Vectorization Example
232
233
```cpp
234
#include <pybind11/pybind11.h>
235
#include <pybind11/numpy.h>
236
237
namespace py = pybind11;
238
239
// Simple scalar function
240
double compute_value(double x, double y) {
241
return x * x + y * y;
242
}
243
244
PYBIND11_MODULE(example, m) {
245
// Vectorize the function automatically
246
m.def("compute_vectorized", py::vectorize(compute_value),
247
"Vectorized computation of x^2 + y^2");
248
}
249
250
// Python usage:
251
// import numpy as np
252
// x = np.array([1, 2, 3])
253
// y = np.array([4, 5, 6])
254
// result = compute_vectorized(x, y) # Returns array([17, 29, 45])
255
```
256
257
### Multi-dimensional Array Processing
258
259
```cpp
260
#include <pybind11/pybind11.h>
261
#include <pybind11/numpy.h>
262
263
namespace py = pybind11;
264
265
py::array_t<double> process_2d_array(py::array_t<double> input) {
266
py::buffer_info buf = input.request();
267
268
if (buf.ndim != 2)
269
throw std::runtime_error("Input array must be 2-dimensional");
270
271
int rows = buf.shape[0];
272
int cols = buf.shape[1];
273
274
// Create output array with same shape
275
auto result = py::array_t<double>({rows, cols});
276
py::buffer_info res_buf = result.request();
277
278
double *input_ptr = static_cast<double*>(buf.ptr);
279
double *output_ptr = static_cast<double*>(res_buf.ptr);
280
281
// Process each element (example: apply smoothing filter)
282
for (int i = 1; i < rows - 1; i++) {
283
for (int j = 1; j < cols - 1; j++) {
284
double sum = 0.0;
285
for (int di = -1; di <= 1; di++) {
286
for (int dj = -1; dj <= 1; dj++) {
287
sum += input_ptr[(i + di) * cols + (j + dj)];
288
}
289
}
290
output_ptr[i * cols + j] = sum / 9.0;
291
}
292
}
293
294
return result;
295
}
296
297
PYBIND11_MODULE(example, m) {
298
m.def("process_2d_array", &process_2d_array);
299
}
300
```
301
302
### Custom Class with Buffer Protocol
303
304
```cpp
305
#include <pybind11/pybind11.h>
306
#include <pybind11/numpy.h>
307
#include <vector>
308
309
namespace py = pybind11;
310
311
class DataContainer {
312
std::vector<double> data_;
313
std::vector<size_t> shape_;
314
315
public:
316
DataContainer(const std::vector<size_t>& shape) : shape_(shape) {
317
size_t size = 1;
318
for (auto dim : shape) size *= dim;
319
data_.resize(size, 0.0);
320
}
321
322
// Enable buffer protocol
323
py::buffer_info get_buffer_info() {
324
return py::buffer_info(
325
data_.data(), // Pointer to data
326
sizeof(double), // Size of one scalar
327
py::format_descriptor<double>::format(), // Python struct-style format
328
shape_.size(), // Number of dimensions
329
shape_, // Buffer dimensions
330
calculate_strides(shape_) // Strides for each index
331
);
332
}
333
334
private:
335
std::vector<size_t> calculate_strides(const std::vector<size_t>& shape) {
336
std::vector<size_t> strides(shape.size());
337
size_t stride = sizeof(double);
338
for (int i = shape.size() - 1; i >= 0; i--) {
339
strides[i] = stride;
340
stride *= shape[i];
341
}
342
return strides;
343
}
344
};
345
346
PYBIND11_MODULE(example, m) {
347
py::class_<DataContainer>(m, "DataContainer", py::buffer_protocol())
348
.def(py::init<const std::vector<size_t>&>())
349
.def_buffer(&DataContainer::get_buffer_info);
350
}
351
352
// Python usage:
353
// container = DataContainer([10, 20])
354
// array = np.array(container, copy=False) # Direct access to C++ data
355
```
356
357
### Integration with Eigen
358
359
```cpp
360
#include <pybind11/pybind11.h>
361
#include <pybind11/eigen.h>
362
#include <Eigen/Dense>
363
364
namespace py = pybind11;
365
366
// Automatic conversion between Eigen and NumPy
367
Eigen::MatrixXd matrix_multiply(const Eigen::MatrixXd& A, const Eigen::MatrixXd& B) {
368
return A * B;
369
}
370
371
Eigen::VectorXd solve_linear_system(const Eigen::MatrixXd& A, const Eigen::VectorXd& b) {
372
return A.colPivHouseholderQr().solve(b);
373
}
374
375
PYBIND11_MODULE(example, m) {
376
m.def("matrix_multiply", &matrix_multiply);
377
m.def("solve_linear_system", &solve_linear_system);
378
}
379
380
// Python usage:
381
// import numpy as np
382
// A = np.random.random((5, 5))
383
// B = np.random.random((5, 3))
384
// C = matrix_multiply(A, B) # Automatic conversion
385
```
386
387
## Performance Considerations
388
389
### Memory Layout and Copying
390
391
```cpp
392
// Avoid unnecessary copying
393
py::array_t<double> process_inplace(py::array_t<double> array) {
394
// Request mutable buffer to modify in-place
395
py::buffer_info buf = array.request(/* writable = */ true);
396
397
double* ptr = static_cast<double*>(buf.ptr);
398
// Modify data in-place...
399
400
return array; // Return modified array
401
}
402
403
// Control memory layout
404
auto create_c_contiguous() {
405
return py::array_t<double>(
406
{100, 100}, // shape
407
{100 * sizeof(double), sizeof(double)} // C-style strides
408
);
409
}
410
411
auto create_f_contiguous() {
412
return py::array_t<double>(
413
{100, 100}, // shape
414
{sizeof(double), 100 * sizeof(double)} // Fortran-style strides
415
);
416
}
417
```
418
419
## Types
420
421
```cpp { .api }
422
namespace pybind11 {
423
// Core NumPy types
424
class array;
425
template<typename T> class array_t;
426
class dtype;
427
class buffer_info;
428
429
// Vectorization
430
template<typename Func> auto vectorize(Func &&f);
431
template<typename Return, typename... Args> class vectorized_function;
432
433
// Memory layout flags
434
enum class array_c_style;
435
enum class array_f_style;
436
enum class array_forcecast;
437
438
// Format descriptors for buffer protocol
439
template<typename T> struct format_descriptor;
440
441
// Buffer protocol utilities
442
template<typename T>
443
py::buffer_info get_buffer_info(T *ptr, ssize_t size);
444
}
445
```