CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-bytedeco--javacpp-presets-platform

Cross-platform Java bindings for 60+ native C/C++ libraries including OpenCV, FFmpeg, PyTorch, TensorFlow, and scientific computing libraries

Pending
Overview
Eval results
Files

multimedia.mddocs/

Multimedia Processing

FFmpeg bindings providing comprehensive video and audio processing capabilities including encoding, decoding, format conversion, streaming, and filtering operations.

Capabilities

Format Context Operations

Handle multimedia file formats and container operations.

/**
 * Format context for input/output operations
 */
public class AVFormatContext extends Pointer {
    /**
     * Allocate format context
     * @return New format context
     */
    public static native AVFormatContext avformat_alloc_context();
    
    /**
     * Open input file or stream
     * @param ps Pointer to format context (will be allocated if null)
     * @param url Input file path or URL
     * @param fmt Input format (null for auto-detection)
     * @param options Format options
     * @return 0 on success, negative on error
     */
    public native int avformat_open_input(AVFormatContext ps, String url, 
        AVInputFormat fmt, AVDictionary options);
    
    /**
     * Read stream information
     * @param options Codec options
     * @return 0 on success, negative on error
     */
    public native int avformat_find_stream_info(AVDictionary options);
    
    /**
     * Close input format context
     */
    public native void avformat_close_input();
    
    /**
     * Get number of streams
     * @return Number of streams in format context
     */
    public native int nb_streams();
    
    /**
     * Get streams array
     * @return Array of streams
     */
    public native AVStreamVector streams();
}

/**
 * Stream structure
 */
public class AVStream extends Pointer {
    /** Stream index */
    public native int index();
    
    /** Stream codec parameters */
    public native AVCodecParameters codecpar();
    
    /** Stream time base */
    public native AVRational time_base();
    
    /** Stream duration */
    public native long duration();
}

/**
 * Input format structure
 */
public class AVInputFormat extends Pointer {
    /** Format name */
    public native String name();
    
    /** Format long name */
    public native String long_name();
    
    /** Format extensions */
    public native String extensions();
}

Codec Operations

Handle audio and video encoding/decoding operations.

/**
 * Codec context for encoding/decoding
 */
public class AVCodecContext extends Pointer {
    /**
     * Allocate codec context
     * @param codec Codec to use
     * @return New codec context
     */
    public static native AVCodecContext avcodec_alloc_context3(AVCodec codec);
    
    /**
     * Open codec
     * @param codec Codec to open
     * @param options Codec options
     * @return 0 on success, negative on error
     */
    public native int avcodec_open2(AVCodec codec, AVDictionary options);
    
    /**
     * Close codec context
     * @return 0 on success
     */
    public native int avcodec_close();
    
    /**
     * Send packet to decoder
     * @param pkt Input packet (null for flushing)
     * @return 0 on success, AVERROR(EAGAIN) if need to read output first
     */
    public native int avcodec_send_packet(AVPacket pkt);
    
    /**
     * Receive frame from decoder
     * @param frame Output frame
     * @return 0 on success, AVERROR(EAGAIN) if need to send input first
     */
    public native int avcodec_receive_frame(AVFrame frame);
    
    /**
     * Send frame to encoder
     * @param frame Input frame (null for flushing)
     * @return 0 on success, AVERROR(EAGAIN) if need to read output first
     */
    public native int avcodec_send_frame(AVFrame frame);
    
    /**
     * Receive packet from encoder
     * @param pkt Output packet
     * @return 0 on success, AVERROR(EAGAIN) if need to send input first
     */
    public native int avcodec_receive_packet(AVPacket pkt);
    
    /** Codec width (video) */
    public native int width();
    
    /** Codec height (video) */
    public native int height();
    
    /** Pixel format (video) */
    public native int pix_fmt();
    
    /** Sample rate (audio) */
    public native int sample_rate();
    
    /** Number of channels (audio) */
    public native int channels();
    
    /** Sample format (audio) */
    public native int sample_fmt();
}

/**
 * Codec structure
 */
public class AVCodec extends Pointer {
    /**
     * Find decoder by codec ID
     * @param id Codec ID
     * @return Decoder codec or null
     */
    public static native AVCodec avcodec_find_decoder(int id);
    
    /**
     * Find encoder by codec ID
     * @param id Codec ID
     * @return Encoder codec or null
     */
    public static native AVCodec avcodec_find_encoder(int id);
    
    /**
     * Find decoder by name
     * @param name Codec name
     * @return Decoder codec or null
     */
    public static native AVCodec avcodec_find_decoder_by_name(String name);
    
    /**
     * Find encoder by name
     * @param name Codec name
     * @return Encoder codec or null
     */
    public static native AVCodec avcodec_find_encoder_by_name(String name);
    
    /** Codec name */
    public native String name();
    
    /** Codec long name */
    public native String long_name();
    
    /** Codec type (video/audio) */
    public native int type();
    
    /** Codec ID */
    public native int id();
}

Frame and Packet Handling

Manage multimedia data containers for processing.

/**
 * Frame structure for decoded audio/video data
 */
public class AVFrame extends Pointer {
    /**
     * Allocate frame
     * @return New frame
     */
    public static native AVFrame av_frame_alloc();
    
    /**
     * Free frame
     * @param frame Frame to free
     */
    public static native void av_frame_free(AVFrame frame);
    
    /**
     * Unreference frame data
     */
    public native void av_frame_unref();
    
    /**
     * Clone frame
     * @return Cloned frame
     */
    public native AVFrame av_frame_clone();
    
    /** Frame width (video) */
    public native int width();
    
    /** Frame height (video) */  
    public native int height();
    
    /** Pixel format (video) */
    public native int format();
    
    /** Sample rate (audio) */
    public native int sample_rate();
    
    /** Number of samples (audio) */
    public native int nb_samples();
    
    /** Channel layout (audio) */
    public native long channel_layout();
    
    /** Frame data planes */
    public native PointerPointer data();
    
    /** Line sizes for each plane */
    public native IntPointer linesize();
    
    /** Presentation timestamp */
    public native long pts();
    
    /** Decoding timestamp */
    public native long dts();
}

/**
 * Packet structure for encoded data
 */
public class AVPacket extends Pointer {
    /**
     * Allocate packet
     * @return New packet
     */
    public static native AVPacket av_packet_alloc();
    
    /**
     * Free packet
     * @param pkt Packet to free
     */
    public static native void av_packet_free(AVPacket pkt);
    
    /**
     * Unreference packet data
     */
    public native void av_packet_unref();
    
    /**
     * Clone packet
     * @return Cloned packet
     */
    public native AVPacket av_packet_clone();
    
    /** Packet data */
    public native BytePointer data();
    
    /** Packet size */
    public native int size();
    
    /** Stream index */
    public native int stream_index();
    
    /** Presentation timestamp */
    public native long pts();
    
    /** Decoding timestamp */
    public native long dts();
    
    /** Duration */
    public native long duration();
}

Scaling and Format Conversion

Convert between different pixel formats and scale video frames.

/**
 * Scaling context for video format conversion
 */
public class SwsContext extends Pointer {
    /**
     * Get scaling context
     * @param srcW Source width
     * @param srcH Source height
     * @param srcFormat Source pixel format
     * @param dstW Destination width
     * @param dstH Destination height
     * @param dstFormat Destination pixel format
     * @param flags Scaling algorithm flags
     * @param srcFilter Source filter
     * @param dstFilter Destination filter
     * @param param Extra parameters
     * @return Scaling context
     */
    public static native SwsContext sws_getContext(int srcW, int srcH, int srcFormat,
        int dstW, int dstH, int dstFormat, int flags, SwsFilter srcFilter,
        SwsFilter dstFilter, DoublePointer param);
    
    /**
     * Scale frame
     * @param srcSlice Source image planes
     * @param srcStride Source line sizes
     * @param srcSliceY Source slice Y position
     * @param srcSliceH Source slice height
     * @param dst Destination image planes
     * @param dstStride Destination line sizes
     * @return Height of output slice
     */
    public native int sws_scale(PointerPointer srcSlice, IntPointer srcStride,
        int srcSliceY, int srcSliceH, PointerPointer dst, IntPointer dstStride);
    
    /**
     * Free scaling context
     */
    public native void sws_freeContext();
}

/**
 * Audio resampling context
 */
public class SwrContext extends Pointer {
    /**
     * Allocate resampling context
     * @return New resampling context
     */
    public static native SwrContext swr_alloc();
    
    /**
     * Initialize resampling context
     * @return 0 on success, negative on error
     */
    public native int swr_init();
    
    /**
     * Convert audio samples
     * @param out Output audio data
     * @param out_count Output sample count
     * @param in Input audio data
     * @param in_count Input sample count
     * @return Number of output samples generated
     */
    public native int swr_convert(PointerPointer out, int out_count,
        PointerPointer in, int in_count);
    
    /**
     * Free resampling context
     */
    public native void swr_free();
}

Filtering

Apply audio and video filters for processing.

/**
 * Filter graph for processing chains
 */
public class AVFilterGraph extends Pointer {
    /**
     * Allocate filter graph
     * @return New filter graph
     */
    public static native AVFilterGraph avfilter_graph_alloc();
    
    /**
     * Free filter graph
     */
    public native void avfilter_graph_free();
    
    /**
     * Configure filter graph
     * @param log_ctx Logging context
     * @return 0 on success, negative on error
     */
    public native int avfilter_graph_config(Pointer log_ctx);
    
    /**
     * Add filter to graph
     * @param filter Filter to add
     * @param name Filter instance name
     * @param args Filter arguments
     * @param log_ctx Logging context
     * @return Filter context or null on error
     */
    public native AVFilterContext avfilter_graph_alloc_filter(AVFilter filter,
        String name, String args, Pointer log_ctx);
}

/**
 * Filter context
 */
public class AVFilterContext extends Pointer {
    /**
     * Initialize filter
     * @param args Filter arguments
     * @param opaque User data
     * @return 0 on success, negative on error
     */
    public native int avfilter_init_str(String args, Pointer opaque);
    
    /**
     * Get filter input pad
     * @param index Input index
     * @return Input pad
     */
    public native AVFilterPad input(int index);
    
    /**
     * Get filter output pad
     * @param index Output index
     * @return Output pad
     */
    public native AVFilterPad output(int index);
}

Usage Examples

Video File Information

import org.bytedeco.ffmpeg.avformat.*;
import org.bytedeco.ffmpeg.avcodec.*;
import static org.bytedeco.ffmpeg.global.avformat.*;
import static org.bytedeco.ffmpeg.global.avcodec.*;

public class VideoInfo {
    static {
        Loader.load(avformat.class);
        Loader.load(avcodec.class);
    }
    
    public static void getVideoInfo(String filename) {
        try (PointerScope scope = new PointerScope()) {
            AVFormatContext formatCtx = avformat_alloc_context();
            
            // Open input file
            if (avformat_open_input(formatCtx, filename, null, null) != 0) {
                System.err.println("Could not open file: " + filename);
                return;
            }
            
            // Find stream information
            if (avformat_find_stream_info(formatCtx, (AVDictionary) null) < 0) {
                System.err.println("Could not find stream information");
                return;
            }
            
            // Print file information
            System.out.println("Format: " + formatCtx.iformat().long_name().getString());
            System.out.println("Duration: " + formatCtx.duration() / AV_TIME_BASE + " seconds");
            System.out.println("Streams: " + formatCtx.nb_streams());
            
            // Iterate through streams
            for (int i = 0; i < formatCtx.nb_streams(); i++) {
                AVStream stream = formatCtx.streams().get(i);
                AVCodecParameters codecpar = stream.codecpar();
                
                if (codecpar.codec_type() == AVMEDIA_TYPE_VIDEO) {
                    System.out.println("Video stream " + i + ":");
                    System.out.println("  Resolution: " + codecpar.width() + "x" + codecpar.height());
                    System.out.println("  Codec: " + avcodec_get_name(codecpar.codec_id()).getString());
                } else if (codecpar.codec_type() == AVMEDIA_TYPE_AUDIO) {
                    System.out.println("Audio stream " + i + ":");
                    System.out.println("  Sample rate: " + codecpar.sample_rate() + " Hz");
                    System.out.println("  Channels: " + codecpar.channels());
                    System.out.println("  Codec: " + avcodec_get_name(codecpar.codec_id()).getString());
                }
            }
            
            avformat_close_input(formatCtx);
        }
    }
}

Video Frame Extraction

import org.bytedeco.ffmpeg.avformat.*;
import org.bytedeco.ffmpeg.avcodec.*;
import org.bytedeco.ffmpeg.swscale.*;
import static org.bytedeco.ffmpeg.global.avformat.*;
import static org.bytedeco.ffmpeg.global.avcodec.*;
import static org.bytedeco.ffmpeg.global.swscale.*;

public class FrameExtractor {
    public static void extractFrames(String inputFile, String outputPattern) {
        try (PointerScope scope = new PointerScope()) {
            AVFormatContext formatCtx = avformat_alloc_context();
            AVCodecContext codecCtx = null;
            AVFrame frame = av_frame_alloc();
            AVFrame rgbFrame = av_frame_alloc();
            AVPacket packet = av_packet_alloc();
            
            // Open input file
            if (avformat_open_input(formatCtx, inputFile, null, null) != 0) {
                System.err.println("Could not open input file");
                return;
            }
            
            avformat_find_stream_info(formatCtx, (AVDictionary) null);
            
            // Find video stream
            int videoStreamIndex = -1;
            for (int i = 0; i < formatCtx.nb_streams(); i++) {
                if (formatCtx.streams().get(i).codecpar().codec_type() == AVMEDIA_TYPE_VIDEO) {
                    videoStreamIndex = i;
                    break;
                }
            }
            
            if (videoStreamIndex == -1) {
                System.err.println("No video stream found");
                return;
            }
            
            AVStream videoStream = formatCtx.streams().get(videoStreamIndex);
            AVCodec codec = avcodec_find_decoder(videoStream.codecpar().codec_id());
            codecCtx = avcodec_alloc_context3(codec);
            avcodec_parameters_to_context(codecCtx, videoStream.codecpar());
            avcodec_open2(codecCtx, codec, (AVDictionary) null);
            
            // Setup scaling context for RGB conversion
            SwsContext swsCtx = sws_getContext(
                codecCtx.width(), codecCtx.height(), codecCtx.pix_fmt(),
                codecCtx.width(), codecCtx.height(), AV_PIX_FMT_RGB24,
                SWS_BILINEAR, null, null, (DoublePointer) null
            );
            
            // Allocate RGB frame buffer
            int numBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, 
                codecCtx.width(), codecCtx.height(), 1);
            BytePointer buffer = new BytePointer(av_malloc(numBytes));
            av_image_fill_arrays(rgbFrame.data(), rgbFrame.linesize(), 
                buffer, AV_PIX_FMT_RGB24, codecCtx.width(), codecCtx.height(), 1);
            
            int frameNumber = 0;
            
            // Read frames
            while (av_read_frame(formatCtx, packet) >= 0) {
                if (packet.stream_index() == videoStreamIndex) {
                    if (avcodec_send_packet(codecCtx, packet) >= 0) {
                        while (avcodec_receive_frame(codecCtx, frame) >= 0) {
                            // Convert to RGB
                            sws_scale(swsCtx, frame.data(), frame.linesize(), 
                                0, codecCtx.height(), rgbFrame.data(), rgbFrame.linesize());
                            
                            // Save frame (simplified - would need actual image saving logic)
                            System.out.println("Extracted frame " + frameNumber);
                            frameNumber++;
                        }
                    }
                }
                av_packet_unref(packet);
            }
            
            // Cleanup
            sws_freeContext(swsCtx);
            av_free(buffer);
            avcodec_close(codecCtx);
            avformat_close_input(formatCtx);
        }
    }
}

Audio Processing

import org.bytedeco.ffmpeg.avformat.*;
import org.bytedeco.ffmpeg.avcodec.*;
import org.bytedeco.ffmpeg.swresample.*;
import static org.bytedeco.ffmpeg.global.avformat.*;
import static org.bytedeco.ffmpeg.global.avcodec.*;
import static org.bytedeco.ffmpeg.global.swresample.*;

public class AudioProcessor {
    public static void processAudio(String inputFile) {
        try (PointerScope scope = new PointerScope()) {
            AVFormatContext formatCtx = avformat_alloc_context();
            
            // Open and analyze input
            avformat_open_input(formatCtx, inputFile, null, null);
            avformat_find_stream_info(formatCtx, (AVDictionary) null);
            
            // Find audio stream
            int audioStreamIndex = -1;
            for (int i = 0; i < formatCtx.nb_streams(); i++) {
                if (formatCtx.streams().get(i).codecpar().codec_type() == AVMEDIA_TYPE_AUDIO) {
                    audioStreamIndex = i;
                    break;
                }
            }
            
            AVStream audioStream = formatCtx.streams().get(audioStreamIndex);
            AVCodec codec = avcodec_find_decoder(audioStream.codecpar().codec_id());
            AVCodecContext codecCtx = avcodec_alloc_context3(codec);
            avcodec_parameters_to_context(codecCtx, audioStream.codecpar());
            avcodec_open2(codecCtx, codec, (AVDictionary) null);
            
            // Setup resampling for consistent output format
            SwrContext swrCtx = swr_alloc();
            av_opt_set_int(swrCtx, "in_channel_layout", codecCtx.channel_layout(), 0);
            av_opt_set_int(swrCtx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
            av_opt_set_int(swrCtx, "in_sample_rate", codecCtx.sample_rate(), 0);
            av_opt_set_int(swrCtx, "out_sample_rate", 44100, 0);
            av_opt_set_sample_fmt(swrCtx, "in_sample_fmt", codecCtx.sample_fmt(), 0);
            av_opt_set_sample_fmt(swrCtx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
            swr_init(swrCtx);
            
            AVFrame frame = av_frame_alloc();
            AVPacket packet = av_packet_alloc();
            
            // Process audio packets
            while (av_read_frame(formatCtx, packet) >= 0) {
                if (packet.stream_index() == audioStreamIndex) {
                    if (avcodec_send_packet(codecCtx, packet) >= 0) {
                        while (avcodec_receive_frame(codecCtx, frame) >= 0) {
                            // Resample audio frame
                            BytePointer outputBuffer = new BytePointer(av_malloc(8192));
                            PointerPointer outputData = new PointerPointer(1).put(0, outputBuffer);
                            
                            int outputSamples = swr_convert(swrCtx, outputData, 2048,
                                frame.data(), frame.nb_samples());
                            
                            System.out.println("Processed " + outputSamples + " audio samples");
                            
                            av_free(outputBuffer);
                        }
                    }
                }
                av_packet_unref(packet);
            }
            
            // Cleanup
            swr_free(swrCtx);
            avcodec_close(codecCtx);
            avformat_close_input(formatCtx);
        }
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-org-bytedeco--javacpp-presets-platform

docs

computer-vision.md

gpu-computing.md

index.md

machine-learning.md

multimedia.md

scientific-computing.md

text-processing.md

tile.json