Fast NumPy array functions written in C for high-performance numerical computing
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Utilities for array transformation, ranking, and data manipulation operations that maintain array structure while modifying values or order. These functions provide specialized operations for data preprocessing and analysis workflows.
In-place replacement of array values with optimized performance.
def replace(a, old, new):
"""
Replace values in array in-place.
Replaces all occurrences of 'old' value with 'new' value in array 'a'.
Supports NaN replacement and handles type casting for integer arrays.
Parameters:
- a: numpy.ndarray, input array to modify (modified in-place)
- old: scalar, value to replace (can be NaN for float arrays)
- new: scalar, replacement value
Returns:
None (array is modified in-place)
Raises:
TypeError: if 'a' is not a numpy array
ValueError: if type casting is not safe for integer arrays
"""Assign ranks to array elements with support for ties and missing values.
def rankdata(a, axis=None):
"""
Assign ranks to data, dealing with ties appropriately.
Returns the ranks of the elements in the array. Ranks begin at 1.
Ties are resolved by averaging the ranks of tied elements.
Parameters:
- a: array_like, input array to rank
- axis: None or int, axis along which to rank (None for flattened array)
Returns:
ndarray, array of ranks (float64 dtype)
"""
def nanrankdata(a, axis=None):
"""
Assign ranks to data, ignoring NaN values.
Similar to rankdata but ignores NaN values in the ranking process.
NaN values in the output array correspond to NaN values in the input.
Parameters:
- a: array_like, input array to rank
- axis: None or int, axis along which to rank (None for flattened array)
Returns:
ndarray, array of ranks with NaN preserved (float64 dtype)
"""Partial sorting operations for efficient selection of order statistics.
def partition(a, kth, axis=-1):
"""
Partial sort array along given axis.
Rearranges array elements such that the k-th element is in its final
sorted position. Elements smaller than k-th are before it, larger after.
This is a re-export of numpy.partition for convenience.
Parameters:
- a: array_like, input array
- kth: int or sequence of ints, indices that define the partition
- axis: int, axis along which to partition (default: -1)
Returns:
ndarray, partitioned array
"""
def argpartition(a, kth, axis=-1):
"""
Indices that would partition array along given axis.
Returns indices that would partition the array, similar to partition
but returning indices rather than the partitioned array.
This is a re-export of numpy.argpartition for convenience.
Parameters:
- a: array_like, input array
- kth: int or sequence of ints, indices that define the partition
- axis: int, axis along which to find partition indices (default: -1)
Returns:
ndarray, indices that would partition the array
"""Propagate valid values forward to fill missing data gaps.
def push(a, n=None, axis=-1):
"""
Fill NaN values by pushing forward the last valid value.
Forward-fills NaN values with the most recent non-NaN value along the
specified axis. Optionally limits the number of consecutive fills.
Parameters:
- a: array_like, input array
- n: int or None, maximum number of consecutive NaN values to fill
(None for unlimited filling, default: None)
- axis: int, axis along which to push values (default: -1)
Returns:
ndarray, array with NaN values forward-filled
"""import bottleneck as bn
import numpy as np
# Replace missing value indicators
data = np.array([1.0, -999.0, 3.0, -999.0, 5.0])
bn.replace(data, -999.0, np.nan) # In-place replacement
print("After replacement:", data) # [1.0, nan, 3.0, nan, 5.0]
# Replace NaN values with zero
data_with_nans = np.array([1.0, np.nan, 3.0, np.nan, 5.0])
bn.replace(data_with_nans, np.nan, 0.0)
print("NaNs replaced:", data_with_nans) # [1.0, 0.0, 3.0, 0.0, 5.0]
# Handle integer arrays (requires compatible types)
int_data = np.array([1, -1, 3, -1, 5])
bn.replace(int_data, -1, 0) # Replace -1 with 0
print("Integer replacement:", int_data) # [1, 0, 3, 0, 5]import bottleneck as bn
import numpy as np
# Basic ranking
scores = np.array([85, 92, 78, 92, 88])
ranks = bn.rankdata(scores)
print("Scores:", scores) # [85, 92, 78, 92, 88]
print("Ranks:", ranks) # [2.0, 4.5, 1.0, 4.5, 3.0]
# Ranking with missing values
scores_with_nan = np.array([85, np.nan, 78, 92, 88])
nan_ranks = bn.nanrankdata(scores_with_nan)
print("Scores with NaN:", scores_with_nan)
print("NaN-aware ranks:", nan_ranks) # [3.0, nan, 1.0, 4.0, 2.0]
# Multi-dimensional ranking
matrix = np.array([[3, 1, 4],
[1, 5, 9],
[2, 6, 5]])
# Rank along rows (axis=1)
row_ranks = bn.rankdata(matrix, axis=1)
print("Row-wise ranks:")
print(row_ranks)
# Rank entire array (flattened)
flat_ranks = bn.rankdata(matrix, axis=None)
print("Flattened ranks:", flat_ranks)import bottleneck as bn
import numpy as np
# Time series with missing values
timeseries = np.array([1.0, 2.0, np.nan, np.nan, 5.0, np.nan, 7.0])
# Unlimited forward fill
filled_unlimited = bn.push(timeseries.copy())
print("Original: ", timeseries)
print("Unlimited: ", filled_unlimited) # [1.0, 2.0, 2.0, 2.0, 5.0, 5.0, 7.0]
# Limited forward fill (max 1 consecutive fill)
filled_limited = bn.push(timeseries.copy(), n=1)
print("Limited(1):", filled_limited) # [1.0, 2.0, 2.0, nan, 5.0, 5.0, 7.0]
# Multi-dimensional forward fill
matrix_ts = np.array([[1.0, np.nan, 3.0],
[np.nan, 2.0, np.nan],
[4.0, np.nan, np.nan]])
# Fill along columns (axis=0)
filled_cols = bn.push(matrix_ts.copy(), axis=0)
print("Original matrix:")
print(matrix_ts)
print("Column-wise filled:")
print(filled_cols)
# Fill along rows (axis=1)
filled_rows = bn.push(matrix_ts.copy(), axis=1)
print("Row-wise filled:")
print(filled_rows)import bottleneck as bn
import numpy as np
# Large array where we need to find top-k elements efficiently
large_array = np.random.randn(10000)
# Find the 10 largest elements using partition (much faster than full sort)
k = 10
# Partition to get 10 largest (at the end)
partitioned = bn.partition(large_array, -k)
top_10 = partitioned[-k:] # Last 10 elements are the largest
# Get indices of top 10 elements
top_10_indices = bn.argpartition(large_array, -k)[-k:]
top_10_values = large_array[top_10_indices]
print("Top 10 values:", top_10_values)
print("Their indices:", top_10_indices)
# For finding median efficiently
n = len(large_array)
median_idx = n // 2
partitioned_for_median = bn.partition(large_array.copy(), median_idx)
median_value = partitioned_for_median[median_idx]
print(f"Median value: {median_value}")import bottleneck as bn
import numpy as np
# Student scores across multiple subjects
students = ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve']
math_scores = np.array([85, 92, 78, 96, 88])
science_scores = np.array([90, 85, 92, 88, 95])
# Convert scores to ranks (higher score = higher rank)
math_ranks = bn.rankdata(math_scores)
science_ranks = bn.rankdata(science_scores)
# Create comprehensive ranking
combined_scores = np.column_stack([math_scores, science_scores])
overall_ranks = bn.rankdata(combined_scores.mean(axis=1))
print("Student Rankings:")
for i, student in enumerate(students):
print(f"{student}: Math={math_ranks[i]:.1f}, Science={science_ranks[i]:.1f}, Overall={overall_ranks[i]:.1f}")
# Handle tied rankings with percentile interpretation
percentiles = ((math_ranks - 1) / (len(math_ranks) - 1)) * 100
print("\nMath Score Percentiles:")
for i, student in enumerate(students):
print(f"{student}: {percentiles[i]:.1f}th percentile")Array manipulation functions provide significant performance benefits:
These functions are optimized for:
Install with Tessl CLI
npx tessl i tessl/pypi-bottleneck