0
# Document Rendering
1
2
Document rendering framework for converting documents into visual representations such as images. This framework provides flexible rendering capabilities for different document types including PDFs, with support for page-based rendering, custom render requests, and advanced result management.
3
4
## Capabilities
5
6
### Renderer Interface
7
8
Core interface for document rendering operations supporting various document formats and rendering requests.
9
10
```java { .api }
11
public interface Renderer extends Serializable {
12
/**
13
* Returns the set of media types supported by this renderer
14
* @param context parse context for renderer configuration
15
* @return immutable set of supported media types
16
*/
17
Set<MediaType> getSupportedTypes(ParseContext context);
18
19
/**
20
* Renders document content based on provided render requests
21
* @param is input stream containing document data
22
* @param metadata document metadata
23
* @param parseContext parsing context
24
* @param requests variable number of render requests
25
* @return collection of render results
26
* @throws IOException if I/O error occurs during rendering
27
* @throws TikaException if rendering fails
28
*/
29
RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
30
RenderRequest... requests) throws IOException, TikaException;
31
}
32
```
33
34
### Composite Renderer
35
36
Default renderer implementation that delegates to format-specific renderers based on media type detection.
37
38
```java { .api }
39
public class CompositeRenderer implements Renderer, Initializable {
40
/**
41
* Creates composite renderer with service-loaded renderers
42
* @param serviceLoader service loader for renderer discovery
43
*/
44
public CompositeRenderer(ServiceLoader serviceLoader);
45
46
/**
47
* Creates composite renderer with provided renderer list
48
* @param renderers list of renderers to compose
49
*/
50
public CompositeRenderer(List<Renderer> renderers);
51
52
/**
53
* Gets the specific renderer for a media type
54
* @param mt media type to find renderer for
55
* @return renderer instance or null if not found
56
*/
57
public Renderer getLeafRenderer(MediaType mt);
58
}
59
```
60
61
### Render Requests
62
63
Base interface and implementations for different rendering request types.
64
65
```java { .api }
66
/**
67
* Base interface for rendering requests - extensible for different document types
68
*/
69
public interface RenderRequest {
70
// Marker interface - implementations define specific request parameters
71
}
72
73
/**
74
* Request for rendering specific page ranges in page-based documents
75
*/
76
public class PageRangeRequest implements RenderRequest {
77
/** Constant for rendering all pages */
78
public static final PageRangeRequest RENDER_ALL = new PageRangeRequest(1, -1);
79
80
/**
81
* Creates page range request
82
* @param from starting page number (1-based)
83
* @param to ending page number (1-based, inclusive, -1 for all)
84
*/
85
public PageRangeRequest(int from, int to);
86
87
/** @return starting page number */
88
public int getFrom();
89
90
/** @return ending page number */
91
public int getTo();
92
}
93
```
94
95
### Render Results
96
97
Classes for managing rendering results and output data.
98
99
```java { .api }
100
/**
101
* Individual render result with status, content, and metadata
102
*/
103
public class RenderResult implements Closeable {
104
/**
105
* Status enumeration for render results
106
*/
107
public enum STATUS {
108
SUCCESS, // Rendering completed successfully
109
EXCEPTION, // Rendering failed with exception
110
TIMEOUT // Rendering timed out
111
}
112
113
/**
114
* Creates render result
115
* @param status rendering status
116
* @param id unique identifier for this result
117
* @param result rendered content (Path or other object)
118
* @param metadata associated metadata
119
*/
120
public RenderResult(STATUS status, int id, Object result, Metadata metadata);
121
122
/** @return input stream for rendered content */
123
public InputStream getInputStream() throws IOException;
124
125
/** @return associated metadata */
126
public Metadata getMetadata();
127
128
/** @return rendering status */
129
public STATUS getStatus();
130
131
/** @return unique result identifier */
132
public int getId();
133
}
134
135
/**
136
* Collection of render results with resource management
137
*/
138
public class RenderResults implements Closeable {
139
/**
140
* Creates render results collection
141
* @param tmp temporary resources manager
142
*/
143
public RenderResults(TemporaryResources tmp);
144
145
/**
146
* Adds render result to collection
147
* @param result render result to add
148
*/
149
public void add(RenderResult result);
150
151
/** @return list of all render results */
152
public List<RenderResult> getResults();
153
}
154
155
/**
156
* Page-organized render results for page-based documents
157
*/
158
public class PageBasedRenderResults extends RenderResults {
159
/**
160
* Creates page-based render results
161
* @param tmp temporary resources manager
162
*/
163
public PageBasedRenderResults(TemporaryResources tmp);
164
165
/**
166
* Gets render results for specific page
167
* @param pageNumber page number to retrieve
168
* @return list of render results for the page
169
*/
170
public List<RenderResult> getPage(int pageNumber);
171
}
172
```
173
174
## Usage Examples
175
176
**Basic Document Rendering:**
177
178
```java
179
import org.apache.tika.renderer.*;
180
import org.apache.tika.config.TikaConfig;
181
import org.apache.tika.metadata.Metadata;
182
import org.apache.tika.metadata.TikaCoreProperties;
183
import org.apache.tika.parser.ParseContext;
184
import java.io.FileInputStream;
185
import java.io.InputStream;
186
187
// Setup renderer
188
TikaConfig config = TikaConfig.getDefaultConfig();
189
CompositeRenderer renderer = new CompositeRenderer(config.getServiceLoader());
190
191
// Setup document metadata
192
Metadata metadata = new Metadata();
193
metadata.set(TikaCoreProperties.TYPE, "application/pdf");
194
195
// Render all pages
196
try (InputStream stream = new FileInputStream("document.pdf")) {
197
RenderResults results = renderer.render(stream, metadata, new ParseContext(),
198
PageRangeRequest.RENDER_ALL);
199
200
// Process results
201
for (RenderResult result : results.getResults()) {
202
if (result.getStatus() == RenderResult.STATUS.SUCCESS) {
203
try (InputStream renderedContent = result.getInputStream()) {
204
// Process rendered content (e.g., save as image)
205
// renderedContent contains the visual representation
206
}
207
}
208
}
209
}
210
```
211
212
**Page-Specific Rendering:**
213
214
```java
215
// Render specific page range (pages 2-5)
216
PageRangeRequest pageRequest = new PageRangeRequest(2, 5);
217
218
try (InputStream stream = new FileInputStream("document.pdf")) {
219
RenderResults results = renderer.render(stream, metadata, new ParseContext(), pageRequest);
220
221
// Use page-based results for organized access
222
if (results instanceof PageBasedRenderResults) {
223
PageBasedRenderResults pageResults = (PageBasedRenderResults) results;
224
225
// Get results for specific page
226
List<RenderResult> page3Results = pageResults.getPage(3);
227
for (RenderResult result : page3Results) {
228
System.out.println("Page 3 render result ID: " + result.getId());
229
}
230
}
231
}
232
```
233
234
**Custom Renderer Implementation:**
235
236
```java
237
public class CustomImageRenderer implements Renderer {
238
@Override
239
public Set<MediaType> getSupportedTypes(ParseContext context) {
240
return Set.of(MediaType.image("jpeg"), MediaType.image("png"));
241
}
242
243
@Override
244
public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
245
RenderRequest... requests) throws IOException, TikaException {
246
RenderResults results = new RenderResults(new TemporaryResources());
247
248
// Custom rendering logic for images
249
// Process input stream and create rendered output
250
251
return results;
252
}
253
}
254
```
255
256
The rendering framework is designed to be extensible, allowing custom implementations for specific document types and use cases. It integrates with Tika's service loader mechanism for automatic renderer discovery and provides comprehensive resource management for temporary files and streams.