CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-tika--tika-core

Apache Tika Core provides the foundational APIs for detecting and extracting metadata and structured text content from various document formats.

Pending
Overview
Eval results
Files

exceptions.mddocs/

Exceptions

Exception hierarchy for handling various error conditions in Tika operations, including document encryption, unsupported formats, corruption detection, and processing limitations.

Capabilities

Base Exception Classes

TikaException

Base exception class for all Tika-specific errors providing common error handling patterns and cause chaining.

/**
 * Base exception for all Tika-related errors and exceptional conditions
 */
public class TikaException extends Exception {
    /**
     * Creates TikaException with error message
     * @param message Description of the error condition
     */
    public TikaException(String message);
    
    /**
     * Creates TikaException with error message and cause
     * @param message Description of the error condition
     * @param cause Underlying exception that caused this error
     */
    public TikaException(String message, Throwable cause);
    
    /**
     * Creates TikaException with cause only
     * @param cause Underlying exception that caused this error
     */
    public TikaException(Throwable cause);
    
    /**
     * Gets the error message including cause information
     * @return String containing detailed error description
     */
    @Override
    public String getMessage();
}

Document Security Exceptions

EncryptedDocumentException

Exception thrown when attempting to process password-protected or encrypted documents without proper credentials.

/**
 * Exception for encrypted or password-protected documents
 */
public class EncryptedDocumentException extends TikaException {
    /**
     * Creates exception for encrypted document
     * @param message Description of encryption issue
     */
    public EncryptedDocumentException(String message);
    
    /**
     * Creates exception with message and underlying cause
     * @param message Description of encryption issue
     * @param cause Underlying exception from encryption handling
     */
    public EncryptedDocumentException(String message, Throwable cause);
    
    /**
     * Creates exception from underlying encryption error
     * @param cause Underlying exception from encryption system
     */
    public EncryptedDocumentException(Throwable cause);
}

AccessPermissionException

Exception for documents that deny access permissions for specific operations like text extraction or printing.

/**
 * Exception for documents with access permission restrictions
 */
public class AccessPermissionException extends TikaException {
    /**
     * Creates exception for access permission denial
     * @param message Description of permission restriction
     */
    public AccessPermissionException(String message);
    
    /**
     * Creates exception with message and underlying cause
     * @param message Description of permission restriction
     * @param cause Underlying exception from permission system
     */
    public AccessPermissionException(String message, Throwable cause);
    
    /**
     * Gets the specific permission that was denied
     * @return String describing denied permission (e.g., "text extraction", "printing")
     */
    public String getDeniedPermission();
}

Format and Corruption Exceptions

UnsupportedFormatException

Exception thrown when encountering document formats that are not supported by available parsers.

/**
 * Exception for unsupported or unrecognized document formats
 */
public class UnsupportedFormatException extends TikaException {
    /**
     * Creates exception for unsupported format
     * @param message Description of format issue
     */
    public UnsupportedFormatException(String message);
    
    /**
     * Creates exception with detected media type
     * @param mediaType MediaType that was detected but not supported
     */
    public UnsupportedFormatException(MediaType mediaType);
    
    /**
     * Creates exception with message and underlying cause
     * @param message Description of format issue
     * @param cause Underlying exception from format handling
     */
    public UnsupportedFormatException(String message, Throwable cause);
    
    /**
     * Gets the media type that caused the exception
     * @return MediaType that was unsupported, or null if unknown
     */
    public MediaType getMediaType();
}

CorruptedFileException

Exception for documents that are corrupted, malformed, or contain invalid data structures.

/**
 * Exception for corrupted, malformed, or invalid documents
 */
public class CorruptedFileException extends TikaException {
    /**
     * Creates exception for corrupted file
     * @param message Description of corruption detected
     */
    public CorruptedFileException(String message);
    
    /**
     * Creates exception with message and underlying cause
     * @param message Description of corruption detected
     * @param cause Underlying exception from corruption detection
     */
    public CorruptedFileException(String message, Throwable cause);
    
    /**
     * Creates exception from underlying parsing error indicating corruption
     * @param cause Underlying exception from parser
     */
    public CorruptedFileException(Throwable cause);
    
    /**
     * Gets the type of corruption detected
     * @return String describing corruption type (e.g., "invalid header", "truncated")
     */
    public String getCorruptionType();
}

ZeroByteFileException

Exception for files that contain no data or are completely empty.

/**
 * Exception for empty files with zero bytes of content
 */
public class ZeroByteFileException extends TikaException {
    /**
     * Creates exception for zero-byte file
     * @param message Description of empty file condition
     */
    public ZeroByteFileException(String message);
    
    /**
     * Creates exception with file path information
     * @param message Description of empty file condition
     * @param filePath Path to the empty file
     */
    public ZeroByteFileException(String message, String filePath);
    
    /**
     * Gets the path of the empty file
     * @return String containing file path, or null if unknown
     */
    public String getFilePath();
}

Processing Limit Exceptions

WriteLimitReachedException

Exception thrown when output size exceeds configured limits during content extraction or conversion.

/**
 * Exception for exceeding configured write limits during processing
 */
public class WriteLimitReachedException extends TikaException {
    /**
     * Creates exception for write limit exceeded
     * @param message Description of limit violation
     */
    public WriteLimitReachedException(String message);
    
    /**
     * Creates exception with limit information
     * @param message Description of limit violation
     * @param limit Maximum allowed size in characters/bytes
     */
    public WriteLimitReachedException(String message, long limit);
    
    /**
     * Creates exception with limit and actual size
     * @param message Description of limit violation
     * @param limit Maximum allowed size
     * @param actualSize Actual size that exceeded limit
     */
    public WriteLimitReachedException(String message, long limit, long actualSize);
    
    /**
     * Gets the configured write limit
     * @return Maximum allowed size in characters/bytes
     */
    public long getLimit();
    
    /**
     * Gets the actual size that exceeded the limit
     * @return Actual size processed when limit was exceeded
     */
    public long getActualSize();
}

Embedded Document Exceptions

EmbeddedDocumentExtractorException

Exception for errors occurring during extraction and processing of embedded documents.

/**
 * Exception for errors in embedded document extraction and processing
 */
public class EmbeddedDocumentExtractorException extends TikaException {
    /**
     * Creates exception for embedded document error
     * @param message Description of extraction issue
     */
    public EmbeddedDocumentExtractorException(String message);
    
    /**
     * Creates exception with message and underlying cause
     * @param message Description of extraction issue
     * @param cause Underlying exception from embedded document processing
     */
    public EmbeddedDocumentExtractorException(String message, Throwable cause);
    
    /**
     * Creates exception with embedded document information
     * @param message Description of extraction issue
     * @param embeddedName Name or identifier of embedded document
     * @param cause Underlying exception
     */
    public EmbeddedDocumentExtractorException(String message, String embeddedName, Throwable cause);
    
    /**
     * Gets the name of the embedded document that caused the error
     * @return String containing embedded document name/identifier
     */
    public String getEmbeddedDocumentName();
}

Configuration Exceptions

TikaConfigException

Exception for configuration-related errors during Tika setup and initialization.

/**
 * Exception for configuration errors during Tika setup and initialization
 */
public class TikaConfigException extends TikaException {
    /**
     * Creates exception for configuration error
     * @param message Description of configuration issue
     */
    public TikaConfigException(String message);
    
    /**
     * Creates exception with message and underlying cause
     * @param message Description of configuration issue
     * @param cause Underlying exception from configuration processing
     */
    public TikaConfigException(String message, Throwable cause);
    
    /**
     * Creates exception from underlying configuration error
     * @param cause Underlying exception from configuration system
     */
    public TikaConfigException(Throwable cause);
    
    /**
     * Gets the configuration parameter that caused the error
     * @return String containing parameter name, or null if not parameter-specific
     */
    public String getConfigParameter();
}

Parser-Specific Exceptions

UnsupportedOperationException

Exception for operations that are not supported by specific parser implementations.

/**
 * Exception for unsupported operations in parser implementations
 */
public class UnsupportedOperationException extends TikaException {
    /**
     * Creates exception for unsupported operation
     * @param message Description of unsupported operation
     */
    public UnsupportedOperationException(String message);
    
    /**
     * Creates exception with operation and parser information
     * @param operation Description of attempted operation
     * @param parserClass Class of parser that doesn't support operation
     */
    public UnsupportedOperationException(String operation, Class<?> parserClass);
    
    /**
     * Gets the operation that was not supported
     * @return String describing attempted operation
     */
    public String getOperation();
    
    /**
     * Gets the parser class that doesn't support the operation
     * @return Class of parser implementation
     */
    public Class<?> getParserClass();
}

Exception Handling Patterns

Basic Exception Handling

// Basic parsing with exception handling
try {
    AutoDetectParser parser = new AutoDetectParser();
    BodyContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    
    parser.parse(inputStream, handler, metadata, new ParseContext());
    String content = handler.toString();
    
} catch (EncryptedDocumentException e) {
    System.err.println("Document is password protected: " + e.getMessage());
    
} catch (UnsupportedFormatException e) {
    System.err.println("Unsupported format: " + e.getMediaType());
    
} catch (CorruptedFileException e) {
    System.err.println("Corrupted file detected: " + e.getCorruptionType());
    
} catch (WriteLimitReachedException e) {
    System.err.println("Output size exceeded limit: " + e.getLimit() + " bytes");
    
} catch (TikaException e) {
    System.err.println("General Tika error: " + e.getMessage());
    
} catch (IOException e) {
    System.err.println("I/O error: " + e.getMessage());
    
} catch (SAXException e) {
    System.err.println("XML parsing error: " + e.getMessage());
}

Comprehensive Error Analysis

public class DocumentProcessor {
    
    public ProcessingResult processDocument(InputStream input, String filename) {
        ProcessingResult result = new ProcessingResult();
        
        try {
            AutoDetectParser parser = new AutoDetectParser();
            BodyContentHandler handler = new BodyContentHandler(1000000); // 1MB limit
            Metadata metadata = new Metadata();
            metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
            
            parser.parse(input, handler, metadata, new ParseContext());
            
            result.setContent(handler.toString());
            result.setMetadata(metadata);
            result.setSuccess(true);
            
            if (handler.isWriteLimitReached()) {
                result.addWarning("Content truncated due to size limit");
            }
            
        } catch (EncryptedDocumentException e) {
            result.setError("ENCRYPTED", "Document requires password: " + e.getMessage());
            
        } catch (UnsupportedFormatException e) {
            MediaType type = e.getMediaType();
            result.setError("UNSUPPORTED_FORMAT", 
                "Format not supported: " + (type != null ? type.toString() : "unknown"));
                
        } catch (CorruptedFileException e) {
            result.setError("CORRUPTED", "File corruption detected: " + e.getCorruptionType());
            
        } catch (ZeroByteFileException e) {
            result.setError("EMPTY", "File is empty: " + e.getFilePath());
            
        } catch (AccessPermissionException e) {
            result.setError("PERMISSION_DENIED", 
                "Access denied for: " + e.getDeniedPermission());
                
        } catch (WriteLimitReachedException e) {
            result.setError("SIZE_LIMIT", 
                String.format("Size limit exceeded: %d > %d", e.getActualSize(), e.getLimit()));
                
        } catch (EmbeddedDocumentExtractorException e) {
            result.setError("EMBEDDED_ERROR", 
                "Embedded document error: " + e.getEmbeddedDocumentName());
                
        } catch (TikaException e) {
            result.setError("TIKA_ERROR", "Processing error: " + e.getMessage());
            
        } catch (Exception e) {
            result.setError("UNKNOWN", "Unexpected error: " + e.getMessage());
        }
        
        return result;
    }
}

Exception Recovery Strategies

public class RobustDocumentParser {
    
    public String extractText(InputStream input) throws TikaException {
        // Try with full parser first
        try {
            return parseWithFullFeatures(input);
            
        } catch (EncryptedDocumentException e) {
            // Try common passwords
            for (String password : getCommonPasswords()) {
                try {
                    return parseWithPassword(input, password);
                } catch (Exception ignored) {
                    // Continue trying other passwords
                }
            }
            throw e; // Re-throw if no password worked
            
        } catch (CorruptedFileException e) {
            // Try lenient parsing mode
            return parseWithLenientMode(input);
            
        } catch (WriteLimitReachedException e) {
            // Return partial content with warning
            return e.getMessage() + "\n[Content truncated]";
            
        } catch (UnsupportedFormatException e) {
            // Try text extraction fallback
            return tryTextExtractionFallback(input);
        }
    }
    
    private String parseWithFullFeatures(InputStream input) throws Exception {
        AutoDetectParser parser = new AutoDetectParser();
        BodyContentHandler handler = new BodyContentHandler();
        parser.parse(input, handler, new Metadata(), new ParseContext());
        return handler.toString();
    }
    
    private String parseWithLenientMode(InputStream input) throws Exception {
        // Implementation with lenient parsing settings
        ParseContext context = new ParseContext();
        // Add lenient configuration to context
        return parseWithFullFeatures(input);
    }
}

Custom Exception Creation

// Creating custom exceptions extending Tika hierarchy
public class CustomFormatException extends UnsupportedFormatException {
    private final String formatVersion;
    
    public CustomFormatException(String message, String formatVersion) {
        super(message);
        this.formatVersion = formatVersion;
    }
    
    public String getFormatVersion() {
        return formatVersion;
    }
}

// Usage in custom parser
public class CustomParser extends AbstractParser {
    
    @Override
    public void parse(InputStream stream, ContentHandler handler, 
                     Metadata metadata, ParseContext context) 
            throws IOException, SAXException, TikaException {
        
        try {
            String version = detectFormatVersion(stream);
            
            if (!isSupportedVersion(version)) {
                throw new CustomFormatException(
                    "Unsupported format version: " + version, version);
            }
            
            // Continue with parsing...
            
        } catch (IOException e) {
            if (isCorruptionError(e)) {
                throw new CorruptedFileException("Invalid file structure", e);
            }
            throw e; // Re-throw other I/O errors
        }
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-tika--tika-core

docs

configuration.md

content-processing.md

detection.md

embedded-extraction.md

embedding.md

exceptions.md

index.md

io-utilities.md

language.md

metadata.md

mime-types.md

parsing.md

pipes.md

process-forking.md

rendering.md

tile.json