A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.
npx @tessl/cli install tessl/pypi-scikit-learn@1.7.0scikit-learn is a comprehensive machine learning library for Python that provides simple and efficient tools for predictive data analysis. It features various classification, regression, and clustering algorithms including support vector machines, random forests, gradient boosting, k-means, and DBSCAN, and is designed to interoperate with the Python numerical and scientific libraries NumPy and SciPy.
Name: scikit-learn
Language: Python
Installation: pip install scikit-learn
Version: 1.7.1
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, classification_reportHere's a simple example demonstrating scikit-learn's consistent API for machine learning:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Load dataset
iris = load_iris()
X, y = iris.data, iris.target
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# Make predictions
y_pred = clf.predict(X_test)
# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")scikit-learn follows several key design principles:
All learning algorithms follow the same interface:
fit(X, y) - Learn from training datapredict(X) - Make predictions on new datatransform(X) - Transform data (for transformers)Combine multiple processing steps:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
pipeline = Pipeline([
('scaler', StandardScaler()),
('classifier', SVC())
])# Classification
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
# Regression
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR# Clustering
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.mixture import GaussianMixture
# Dimensionality Reduction
from sklearn.decomposition import PCA, FastICA, NMF
from sklearn.manifold import TSNE, Isomap# Scaling and Normalization
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
# Encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
# Feature Engineering
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import SelectKBest, RFEData Preprocessing and Feature Engineering
# Cross-Validation
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
# Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_scoreModel Selection and Evaluation
# Load toy datasets
from sklearn.datasets import load_iris, load_diabetes, load_wine, load_breast_cancer
# Generate synthetic data
from sklearn.datasets import make_classification, make_regression, make_blobs
# Fetch real-world datasets
from sklearn.datasets import fetch_20newsgroups, fetch_california_housing# Classification metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import ConfusionMatrixDisplay, RocCurveDisplay
# Regression metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import PredictionErrorDisplay# Text vectorization
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer, TfidfTransformer
# Dictionary and hashing
from sklearn.feature_extraction import DictVectorizer, FeatureHasher
# Image processing
from sklearn.feature_extraction.image import img_to_graph, grid_to_graph# Pipeline construction
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
# Column-wise transformations
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.compose import TransformedTargetRegressor# Classification and regression
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neighbors import RadiusNeighborsClassifier, RadiusNeighborsRegressor
# Outlier detection and density estimation
from sklearn.neighbors import LocalOutlierFactor, KernelDensity
from sklearn.neighbors import NearestNeighbors, NearestCentroid# Core utilities
from sklearn.base import clone
from sklearn import get_config, set_config, config_context
# Version and system information
import sklearn
sklearn.__version__, sklearn.show_versions()import sklearn
print(sklearn.__version__) # "1.7.1"
# Get system information
sklearn.show_versions()scikit-learn provides everything needed for machine learning workflows, from data preprocessing to model evaluation, making it the go-to library for machine learning in Python.