CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-tika--tika-core

Apache Tika Core provides the foundational APIs for detecting and extracting metadata and structured text content from various document formats.

Pending
Overview
Eval results
Files

rendering.mddocs/

Document Rendering

Document rendering framework for converting documents into visual representations such as images. This framework provides flexible rendering capabilities for different document types including PDFs, with support for page-based rendering, custom render requests, and advanced result management.

Capabilities

Renderer Interface

Core interface for document rendering operations supporting various document formats and rendering requests.

public interface Renderer extends Serializable {
    /**
     * Returns the set of media types supported by this renderer
     * @param context parse context for renderer configuration
     * @return immutable set of supported media types
     */
    Set<MediaType> getSupportedTypes(ParseContext context);
    
    /**
     * Renders document content based on provided render requests  
     * @param is input stream containing document data
     * @param metadata document metadata
     * @param parseContext parsing context
     * @param requests variable number of render requests
     * @return collection of render results
     * @throws IOException if I/O error occurs during rendering
     * @throws TikaException if rendering fails
     */
    RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
                         RenderRequest... requests) throws IOException, TikaException;
}

Composite Renderer

Default renderer implementation that delegates to format-specific renderers based on media type detection.

public class CompositeRenderer implements Renderer, Initializable {
    /**
     * Creates composite renderer with service-loaded renderers
     * @param serviceLoader service loader for renderer discovery
     */
    public CompositeRenderer(ServiceLoader serviceLoader);
    
    /**
     * Creates composite renderer with provided renderer list
     * @param renderers list of renderers to compose
     */
    public CompositeRenderer(List<Renderer> renderers);
    
    /**
     * Gets the specific renderer for a media type
     * @param mt media type to find renderer for
     * @return renderer instance or null if not found
     */
    public Renderer getLeafRenderer(MediaType mt);
}

Render Requests

Base interface and implementations for different rendering request types.

/**
 * Base interface for rendering requests - extensible for different document types
 */
public interface RenderRequest {
    // Marker interface - implementations define specific request parameters
}

/**
 * Request for rendering specific page ranges in page-based documents
 */
public class PageRangeRequest implements RenderRequest {
    /** Constant for rendering all pages */
    public static final PageRangeRequest RENDER_ALL = new PageRangeRequest(1, -1);
    
    /**
     * Creates page range request
     * @param from starting page number (1-based)
     * @param to ending page number (1-based, inclusive, -1 for all)
     */
    public PageRangeRequest(int from, int to);
    
    /** @return starting page number */
    public int getFrom();
    
    /** @return ending page number */
    public int getTo();
}

Render Results

Classes for managing rendering results and output data.

/**
 * Individual render result with status, content, and metadata
 */
public class RenderResult implements Closeable {
    /**
     * Status enumeration for render results
     */
    public enum STATUS {
        SUCCESS,   // Rendering completed successfully
        EXCEPTION, // Rendering failed with exception  
        TIMEOUT    // Rendering timed out
    }
    
    /**
     * Creates render result
     * @param status rendering status
     * @param id unique identifier for this result
     * @param result rendered content (Path or other object)
     * @param metadata associated metadata
     */
    public RenderResult(STATUS status, int id, Object result, Metadata metadata);
    
    /** @return input stream for rendered content */
    public InputStream getInputStream() throws IOException;
    
    /** @return associated metadata */
    public Metadata getMetadata();
    
    /** @return rendering status */
    public STATUS getStatus();
    
    /** @return unique result identifier */
    public int getId();
}

/**
 * Collection of render results with resource management
 */
public class RenderResults implements Closeable {
    /**
     * Creates render results collection
     * @param tmp temporary resources manager
     */
    public RenderResults(TemporaryResources tmp);
    
    /**
     * Adds render result to collection
     * @param result render result to add
     */
    public void add(RenderResult result);
    
    /** @return list of all render results */
    public List<RenderResult> getResults();
}

/**
 * Page-organized render results for page-based documents
 */
public class PageBasedRenderResults extends RenderResults {
    /**
     * Creates page-based render results
     * @param tmp temporary resources manager
     */
    public PageBasedRenderResults(TemporaryResources tmp);
    
    /**
     * Gets render results for specific page
     * @param pageNumber page number to retrieve
     * @return list of render results for the page
     */
    public List<RenderResult> getPage(int pageNumber);
}

Usage Examples

Basic Document Rendering:

import org.apache.tika.renderer.*;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import java.io.FileInputStream;
import java.io.InputStream;

// Setup renderer
TikaConfig config = TikaConfig.getDefaultConfig();
CompositeRenderer renderer = new CompositeRenderer(config.getServiceLoader());

// Setup document metadata
Metadata metadata = new Metadata();
metadata.set(TikaCoreProperties.TYPE, "application/pdf");

// Render all pages
try (InputStream stream = new FileInputStream("document.pdf")) {
    RenderResults results = renderer.render(stream, metadata, new ParseContext(),
                                          PageRangeRequest.RENDER_ALL);
    
    // Process results
    for (RenderResult result : results.getResults()) {
        if (result.getStatus() == RenderResult.STATUS.SUCCESS) {
            try (InputStream renderedContent = result.getInputStream()) {
                // Process rendered content (e.g., save as image)
                // renderedContent contains the visual representation
            }
        }
    }
}

Page-Specific Rendering:

// Render specific page range (pages 2-5)
PageRangeRequest pageRequest = new PageRangeRequest(2, 5);

try (InputStream stream = new FileInputStream("document.pdf")) {
    RenderResults results = renderer.render(stream, metadata, new ParseContext(), pageRequest);
    
    // Use page-based results for organized access
    if (results instanceof PageBasedRenderResults) {
        PageBasedRenderResults pageResults = (PageBasedRenderResults) results;
        
        // Get results for specific page
        List<RenderResult> page3Results = pageResults.getPage(3);
        for (RenderResult result : page3Results) {
            System.out.println("Page 3 render result ID: " + result.getId());
        }
    }
}

Custom Renderer Implementation:

public class CustomImageRenderer implements Renderer {
    @Override
    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return Set.of(MediaType.image("jpeg"), MediaType.image("png"));
    }
    
    @Override
    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
                               RenderRequest... requests) throws IOException, TikaException {
        RenderResults results = new RenderResults(new TemporaryResources());
        
        // Custom rendering logic for images
        // Process input stream and create rendered output
        
        return results;
    }
}

The rendering framework is designed to be extensible, allowing custom implementations for specific document types and use cases. It integrates with Tika's service loader mechanism for automatic renderer discovery and provides comprehensive resource management for temporary files and streams.

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-tika--tika-core

docs

configuration.md

content-processing.md

detection.md

embedded-extraction.md

embedding.md

exceptions.md

index.md

io-utilities.md

language.md

metadata.md

mime-types.md

parsing.md

pipes.md

process-forking.md

rendering.md

tile.json