Real-time person and body part segmentation using TensorFlow.js for web browsers with machine learning models.
—
Person segmentation capabilities for detecting and isolating human figures in images and videos. Provides both semantic segmentation (combined mask for all people) and instance segmentation (individual masks per person) with associated pose keypoints.
Segments all people in the image into a single combined mask with all detected poses.
/**
* Segments all people in the image, returns combined segmentation mask
* @param input - Image input (ImageData, HTMLImageElement, HTMLCanvasElement, HTMLVideoElement, tf.Tensor3D)
* @param config - Optional inference configuration
* @returns Promise resolving to semantic person segmentation result
*/
segmentPerson(
input: BodyPixInput,
config?: PersonInferenceConfig
): Promise<SemanticPersonSegmentation>;
interface SemanticPersonSegmentation {
/** Binary segmentation mask (0=background, 1=person) for all people */
data: Uint8Array;
/** Mask width in pixels */
width: number;
/** Mask height in pixels */
height: number;
/** Array of all detected poses */
allPoses: Pose[];
}
type BodyPixInput = ImageData | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement | OffscreenCanvas | tf.Tensor3D;Segments multiple people individually, providing separate masks and poses for each detected person.
/**
* Segments multiple people individually with instance-level segmentation
* @param input - Image input
* @param config - Optional multi-person inference configuration
* @returns Promise resolving to array of individual person segmentations
*/
segmentMultiPerson(
input: BodyPixInput,
config?: MultiPersonInstanceInferenceConfig
): Promise<PersonSegmentation[]>;
interface PersonSegmentation {
/** Binary segmentation mask (0=background, 1=person) for this person */
data: Uint8Array;
/** Mask width in pixels */
width: number;
/** Mask height in pixels */
height: number;
/** Pose keypoints for this person */
pose: Pose;
}interface PersonInferenceConfig {
/** Flip result horizontally for mirrored cameras */
flipHorizontal?: boolean;
/** Internal resolution - higher values = better accuracy, slower inference */
internalResolution?: 'low' | 'medium' | 'high' | 'full' | number;
/** Threshold for segmentation confidence (0-1) */
segmentationThreshold?: number;
/** Maximum number of poses to detect */
maxDetections?: number;
/** Minimum pose confidence score (0-1) */
scoreThreshold?: number;
/** Non-maximum suppression radius for pose detection */
nmsRadius?: number;
}
interface MultiPersonInstanceInferenceConfig extends PersonInferenceConfig {
/** Minimum keypoint score for pose matching */
minKeypointScore?: number;
/** Number of refinement steps for accuracy */
refineSteps?: number;
}
// Default configuration constants
const PERSON_INFERENCE_CONFIG: PersonInferenceConfig = {
flipHorizontal: false,
internalResolution: 'medium',
segmentationThreshold: 0.7,
maxDetections: 10,
scoreThreshold: 0.4,
nmsRadius: 20,
};
const MULTI_PERSON_INSTANCE_INFERENCE_CONFIG: MultiPersonInstanceInferenceConfig = {
flipHorizontal: false,
internalResolution: 'medium',
segmentationThreshold: 0.7,
maxDetections: 10,
scoreThreshold: 0.4,
nmsRadius: 20,
minKeypointScore: 0.3,
refineSteps: 10
};Usage Examples:
import * as bodyPix from '@tensorflow-models/body-pix';
const net = await bodyPix.load();
const imageElement = document.getElementById('people-image');
// Basic semantic segmentation
const segmentation = await net.segmentPerson(imageElement);
console.log(`Found ${segmentation.allPoses.length} people`);
// High-accuracy semantic segmentation
const highQualitySegmentation = await net.segmentPerson(imageElement, {
internalResolution: 'high',
segmentationThreshold: 0.8,
scoreThreshold: 0.5
});
// Multi-person instance segmentation
const peopleSegmentations = await net.segmentMultiPerson(imageElement, {
maxDetections: 5,
scoreThreshold: 0.4,
segmentationThreshold: 0.7
});
console.log(`Detected ${peopleSegmentations.length} individual people`);
peopleSegmentations.forEach((person, index) => {
console.log(`Person ${index}: pose score ${person.pose.score}`);
});
// Webcam segmentation with horizontal flip
const videoElement = document.getElementById('webcam');
const webcamSegmentation = await net.segmentPerson(videoElement, {
flipHorizontal: true,
internalResolution: 'medium'
});Real-time Applications (30+ FPS):
const config = {
internalResolution: 'low',
segmentationThreshold: 0.7,
maxDetections: 3
};Balanced Quality (15-30 FPS):
const config = {
internalResolution: 'medium',
segmentationThreshold: 0.7,
maxDetections: 5
};High Quality (5-15 FPS):
const config = {
internalResolution: 'high',
segmentationThreshold: 0.8,
scoreThreshold: 0.5,
maxDetections: 10
};Each segmentation result includes pose keypoints providing additional context:
interface Pose {
/** Array of 17 body keypoints (nose, eyes, ears, shoulders, elbows, wrists, hips, knees, ankles) */
keypoints: Keypoint[];
/** Overall pose confidence score (0-1) */
score: number;
}
interface Keypoint {
/** Keypoint confidence score (0-1) */
score: number;
/** Pixel coordinates of the keypoint */
position: Vector2D;
/** Body part name (e.g., 'nose', 'leftShoulder', 'rightAnkle') */
part: string;
}
interface Vector2D {
x: number;
y: number;
}Keypoint Names:
nose, leftEye, rightEye, leftEar, rightEarleftShoulder, rightShoulder, leftElbow, rightElbowleftWrist, rightWrist, leftHip, rightHipleftKnee, rightKnee, leftAnkle, rightAnkleInstall with Tessl CLI
npx tessl i tessl/npm-tensorflow-models--body-pix