Apache Tika Core provides the foundational APIs for detecting and extracting metadata and structured text content from various document formats.
—
Exception hierarchy for handling various error conditions in Tika operations, including document encryption, unsupported formats, corruption detection, and processing limitations.
Base exception class for all Tika-specific errors providing common error handling patterns and cause chaining.
/**
* Base exception for all Tika-related errors and exceptional conditions
*/
public class TikaException extends Exception {
/**
* Creates TikaException with error message
* @param message Description of the error condition
*/
public TikaException(String message);
/**
* Creates TikaException with error message and cause
* @param message Description of the error condition
* @param cause Underlying exception that caused this error
*/
public TikaException(String message, Throwable cause);
/**
* Creates TikaException with cause only
* @param cause Underlying exception that caused this error
*/
public TikaException(Throwable cause);
/**
* Gets the error message including cause information
* @return String containing detailed error description
*/
@Override
public String getMessage();
}Exception thrown when attempting to process password-protected or encrypted documents without proper credentials.
/**
* Exception for encrypted or password-protected documents
*/
public class EncryptedDocumentException extends TikaException {
/**
* Creates exception for encrypted document
* @param message Description of encryption issue
*/
public EncryptedDocumentException(String message);
/**
* Creates exception with message and underlying cause
* @param message Description of encryption issue
* @param cause Underlying exception from encryption handling
*/
public EncryptedDocumentException(String message, Throwable cause);
/**
* Creates exception from underlying encryption error
* @param cause Underlying exception from encryption system
*/
public EncryptedDocumentException(Throwable cause);
}Exception for documents that deny access permissions for specific operations like text extraction or printing.
/**
* Exception for documents with access permission restrictions
*/
public class AccessPermissionException extends TikaException {
/**
* Creates exception for access permission denial
* @param message Description of permission restriction
*/
public AccessPermissionException(String message);
/**
* Creates exception with message and underlying cause
* @param message Description of permission restriction
* @param cause Underlying exception from permission system
*/
public AccessPermissionException(String message, Throwable cause);
/**
* Gets the specific permission that was denied
* @return String describing denied permission (e.g., "text extraction", "printing")
*/
public String getDeniedPermission();
}Exception thrown when encountering document formats that are not supported by available parsers.
/**
* Exception for unsupported or unrecognized document formats
*/
public class UnsupportedFormatException extends TikaException {
/**
* Creates exception for unsupported format
* @param message Description of format issue
*/
public UnsupportedFormatException(String message);
/**
* Creates exception with detected media type
* @param mediaType MediaType that was detected but not supported
*/
public UnsupportedFormatException(MediaType mediaType);
/**
* Creates exception with message and underlying cause
* @param message Description of format issue
* @param cause Underlying exception from format handling
*/
public UnsupportedFormatException(String message, Throwable cause);
/**
* Gets the media type that caused the exception
* @return MediaType that was unsupported, or null if unknown
*/
public MediaType getMediaType();
}Exception for documents that are corrupted, malformed, or contain invalid data structures.
/**
* Exception for corrupted, malformed, or invalid documents
*/
public class CorruptedFileException extends TikaException {
/**
* Creates exception for corrupted file
* @param message Description of corruption detected
*/
public CorruptedFileException(String message);
/**
* Creates exception with message and underlying cause
* @param message Description of corruption detected
* @param cause Underlying exception from corruption detection
*/
public CorruptedFileException(String message, Throwable cause);
/**
* Creates exception from underlying parsing error indicating corruption
* @param cause Underlying exception from parser
*/
public CorruptedFileException(Throwable cause);
/**
* Gets the type of corruption detected
* @return String describing corruption type (e.g., "invalid header", "truncated")
*/
public String getCorruptionType();
}Exception for files that contain no data or are completely empty.
/**
* Exception for empty files with zero bytes of content
*/
public class ZeroByteFileException extends TikaException {
/**
* Creates exception for zero-byte file
* @param message Description of empty file condition
*/
public ZeroByteFileException(String message);
/**
* Creates exception with file path information
* @param message Description of empty file condition
* @param filePath Path to the empty file
*/
public ZeroByteFileException(String message, String filePath);
/**
* Gets the path of the empty file
* @return String containing file path, or null if unknown
*/
public String getFilePath();
}Exception thrown when output size exceeds configured limits during content extraction or conversion.
/**
* Exception for exceeding configured write limits during processing
*/
public class WriteLimitReachedException extends TikaException {
/**
* Creates exception for write limit exceeded
* @param message Description of limit violation
*/
public WriteLimitReachedException(String message);
/**
* Creates exception with limit information
* @param message Description of limit violation
* @param limit Maximum allowed size in characters/bytes
*/
public WriteLimitReachedException(String message, long limit);
/**
* Creates exception with limit and actual size
* @param message Description of limit violation
* @param limit Maximum allowed size
* @param actualSize Actual size that exceeded limit
*/
public WriteLimitReachedException(String message, long limit, long actualSize);
/**
* Gets the configured write limit
* @return Maximum allowed size in characters/bytes
*/
public long getLimit();
/**
* Gets the actual size that exceeded the limit
* @return Actual size processed when limit was exceeded
*/
public long getActualSize();
}Exception for errors occurring during extraction and processing of embedded documents.
/**
* Exception for errors in embedded document extraction and processing
*/
public class EmbeddedDocumentExtractorException extends TikaException {
/**
* Creates exception for embedded document error
* @param message Description of extraction issue
*/
public EmbeddedDocumentExtractorException(String message);
/**
* Creates exception with message and underlying cause
* @param message Description of extraction issue
* @param cause Underlying exception from embedded document processing
*/
public EmbeddedDocumentExtractorException(String message, Throwable cause);
/**
* Creates exception with embedded document information
* @param message Description of extraction issue
* @param embeddedName Name or identifier of embedded document
* @param cause Underlying exception
*/
public EmbeddedDocumentExtractorException(String message, String embeddedName, Throwable cause);
/**
* Gets the name of the embedded document that caused the error
* @return String containing embedded document name/identifier
*/
public String getEmbeddedDocumentName();
}Exception for configuration-related errors during Tika setup and initialization.
/**
* Exception for configuration errors during Tika setup and initialization
*/
public class TikaConfigException extends TikaException {
/**
* Creates exception for configuration error
* @param message Description of configuration issue
*/
public TikaConfigException(String message);
/**
* Creates exception with message and underlying cause
* @param message Description of configuration issue
* @param cause Underlying exception from configuration processing
*/
public TikaConfigException(String message, Throwable cause);
/**
* Creates exception from underlying configuration error
* @param cause Underlying exception from configuration system
*/
public TikaConfigException(Throwable cause);
/**
* Gets the configuration parameter that caused the error
* @return String containing parameter name, or null if not parameter-specific
*/
public String getConfigParameter();
}Exception for operations that are not supported by specific parser implementations.
/**
* Exception for unsupported operations in parser implementations
*/
public class UnsupportedOperationException extends TikaException {
/**
* Creates exception for unsupported operation
* @param message Description of unsupported operation
*/
public UnsupportedOperationException(String message);
/**
* Creates exception with operation and parser information
* @param operation Description of attempted operation
* @param parserClass Class of parser that doesn't support operation
*/
public UnsupportedOperationException(String operation, Class<?> parserClass);
/**
* Gets the operation that was not supported
* @return String describing attempted operation
*/
public String getOperation();
/**
* Gets the parser class that doesn't support the operation
* @return Class of parser implementation
*/
public Class<?> getParserClass();
}// Basic parsing with exception handling
try {
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
parser.parse(inputStream, handler, metadata, new ParseContext());
String content = handler.toString();
} catch (EncryptedDocumentException e) {
System.err.println("Document is password protected: " + e.getMessage());
} catch (UnsupportedFormatException e) {
System.err.println("Unsupported format: " + e.getMediaType());
} catch (CorruptedFileException e) {
System.err.println("Corrupted file detected: " + e.getCorruptionType());
} catch (WriteLimitReachedException e) {
System.err.println("Output size exceeded limit: " + e.getLimit() + " bytes");
} catch (TikaException e) {
System.err.println("General Tika error: " + e.getMessage());
} catch (IOException e) {
System.err.println("I/O error: " + e.getMessage());
} catch (SAXException e) {
System.err.println("XML parsing error: " + e.getMessage());
}public class DocumentProcessor {
public ProcessingResult processDocument(InputStream input, String filename) {
ProcessingResult result = new ProcessingResult();
try {
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler(1000000); // 1MB limit
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
parser.parse(input, handler, metadata, new ParseContext());
result.setContent(handler.toString());
result.setMetadata(metadata);
result.setSuccess(true);
if (handler.isWriteLimitReached()) {
result.addWarning("Content truncated due to size limit");
}
} catch (EncryptedDocumentException e) {
result.setError("ENCRYPTED", "Document requires password: " + e.getMessage());
} catch (UnsupportedFormatException e) {
MediaType type = e.getMediaType();
result.setError("UNSUPPORTED_FORMAT",
"Format not supported: " + (type != null ? type.toString() : "unknown"));
} catch (CorruptedFileException e) {
result.setError("CORRUPTED", "File corruption detected: " + e.getCorruptionType());
} catch (ZeroByteFileException e) {
result.setError("EMPTY", "File is empty: " + e.getFilePath());
} catch (AccessPermissionException e) {
result.setError("PERMISSION_DENIED",
"Access denied for: " + e.getDeniedPermission());
} catch (WriteLimitReachedException e) {
result.setError("SIZE_LIMIT",
String.format("Size limit exceeded: %d > %d", e.getActualSize(), e.getLimit()));
} catch (EmbeddedDocumentExtractorException e) {
result.setError("EMBEDDED_ERROR",
"Embedded document error: " + e.getEmbeddedDocumentName());
} catch (TikaException e) {
result.setError("TIKA_ERROR", "Processing error: " + e.getMessage());
} catch (Exception e) {
result.setError("UNKNOWN", "Unexpected error: " + e.getMessage());
}
return result;
}
}public class RobustDocumentParser {
public String extractText(InputStream input) throws TikaException {
// Try with full parser first
try {
return parseWithFullFeatures(input);
} catch (EncryptedDocumentException e) {
// Try common passwords
for (String password : getCommonPasswords()) {
try {
return parseWithPassword(input, password);
} catch (Exception ignored) {
// Continue trying other passwords
}
}
throw e; // Re-throw if no password worked
} catch (CorruptedFileException e) {
// Try lenient parsing mode
return parseWithLenientMode(input);
} catch (WriteLimitReachedException e) {
// Return partial content with warning
return e.getMessage() + "\n[Content truncated]";
} catch (UnsupportedFormatException e) {
// Try text extraction fallback
return tryTextExtractionFallback(input);
}
}
private String parseWithFullFeatures(InputStream input) throws Exception {
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
parser.parse(input, handler, new Metadata(), new ParseContext());
return handler.toString();
}
private String parseWithLenientMode(InputStream input) throws Exception {
// Implementation with lenient parsing settings
ParseContext context = new ParseContext();
// Add lenient configuration to context
return parseWithFullFeatures(input);
}
}// Creating custom exceptions extending Tika hierarchy
public class CustomFormatException extends UnsupportedFormatException {
private final String formatVersion;
public CustomFormatException(String message, String formatVersion) {
super(message);
this.formatVersion = formatVersion;
}
public String getFormatVersion() {
return formatVersion;
}
}
// Usage in custom parser
public class CustomParser extends AbstractParser {
@Override
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
try {
String version = detectFormatVersion(stream);
if (!isSupportedVersion(version)) {
throw new CustomFormatException(
"Unsupported format version: " + version, version);
}
// Continue with parsing...
} catch (IOException e) {
if (isCorruptionError(e)) {
throw new CorruptedFileException("Invalid file structure", e);
}
throw e; // Re-throw other I/O errors
}
}
}Install with Tessl CLI
npx tessl i tessl/maven-org-apache-tika--tika-core