0
# Content Stream Processing
1
2
Low-level content stream parsing and generation for advanced PDF content manipulation, custom rendering engines, and detailed content analysis.
3
4
## Base Stream Processing
5
6
Foundation classes for processing PDF content streams.
7
8
```java { .api }
9
// Constructor and methods in org.apache.pdfbox.contentstream.PDFStreamEngine
10
public PDFStreamEngine();
11
public PDFStreamEngine(ResourceCache resourceCache);
12
13
// Main processing methods
14
public void processPage(PDPage page) throws IOException;
15
public void processStream(PDContentStream contentStream, PDPage page, PDResources resources) throws IOException;
16
17
// Operator handling
18
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException;
19
protected void unsupportedOperator(Operator operator, List<COSBase> operands) throws IOException;
20
21
// State management
22
public PDGraphicsState getGraphicsState();
23
public Matrix getTextMatrix();
24
public Matrix getTextLineMatrix();
25
```
26
27
## Graphics Stream Processing
28
29
Enhanced stream processing for graphics operations and rendering.
30
31
```java { .api }
32
// Constructor in org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine
33
public PDFGraphicsStreamEngine(PDPage page);
34
35
// Abstract graphics methods (must be implemented)
36
protected abstract void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException;
37
protected abstract void drawImage(PDImage pdImage) throws IOException;
38
protected abstract void clip(int windingRule) throws IOException;
39
protected abstract void moveTo(float x, float y) throws IOException;
40
protected abstract void lineTo(float x, float y) throws IOException;
41
protected abstract void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException;
42
protected abstract void closePath() throws IOException;
43
protected abstract void endPath() throws IOException;
44
protected abstract void strokePath() throws IOException;
45
protected abstract void fillPath(int windingRule) throws IOException;
46
protected abstract void fillAndStrokePath(int windingRule) throws IOException;
47
protected abstract void shadingFill(COSName shadingName) throws IOException;
48
49
// Graphics state access
50
public PDColor getStrokingColor();
51
public PDColor getNonStrokingColor();
52
public float getLineWidth();
53
public int getLineCap();
54
public int getLineJoin();
55
public float getMiterLimit();
56
public float[] getLineDashPattern();
57
public float getLineDashPhase();
58
```
59
60
## Operator Processing
61
62
Handle specific PDF operators and their operands.
63
64
```java { .api }
65
// Methods for specific operator categories in PDFStreamEngine
66
protected void processTextPosition(TextPosition text);
67
protected void showText(byte[] string) throws IOException;
68
protected void showTextAdjusted(List<Object> array) throws IOException;
69
70
// Graphics state operators
71
protected void saveGraphicsState() throws IOException;
72
protected void restoreGraphicsState() throws IOException;
73
protected void concatenate(Matrix matrix) throws IOException;
74
75
// Path construction operators
76
protected void moveToOperator(List<COSBase> operands) throws IOException;
77
protected void lineToOperator(List<COSBase> operands) throws IOException;
78
protected void curveToOperator(List<COSBase> operands) throws IOException;
79
protected void closePathOperator(List<COSBase> operands) throws IOException;
80
protected void rectangleOperator(List<COSBase> operands) throws IOException;
81
82
// Path painting operators
83
protected void strokeOperator(List<COSBase> operands) throws IOException;
84
protected void fillOperator(List<COSBase> operands) throws IOException;
85
protected void fillAndStrokeOperator(List<COSBase> operands) throws IOException;
86
protected void clipOperator(List<COSBase> operands) throws IOException;
87
```
88
89
## Content Stream Creation
90
91
Generate PDF content streams programmatically.
92
93
```java { .api }
94
// Methods in org.apache.pdfbox.pdmodel.PDPageContentStream for content generation
95
public void beginText() throws IOException;
96
public void endText() throws IOException;
97
public void setFont(PDFont font, float fontSize) throws IOException;
98
public void setFontAndSize(PDFont font, float fontSize) throws IOException;
99
public void newLineAtOffset(float tx, float ty) throws IOException;
100
public void setTextMatrix(Matrix matrix) throws IOException;
101
public void showText(String text) throws IOException;
102
public void showTextWithPositioning(Object[] textWithPositioning) throws IOException;
103
104
// Path operations
105
public void moveTo(float x, float y) throws IOException;
106
public void lineTo(float x, float y) throws IOException;
107
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException;
108
public void addRect(float x, float y, float width, float height) throws IOException;
109
public void closePath() throws IOException;
110
111
// Path painting
112
public void stroke() throws IOException;
113
public void fill() throws IOException;
114
public void fillAndStroke() throws IOException;
115
public void closeAndStroke() throws IOException;
116
public void closeAndFillAndStroke() throws IOException;
117
public void clip() throws IOException;
118
119
// Graphics state
120
public void saveGraphicsState() throws IOException;
121
public void restoreGraphicsState() throws IOException;
122
public void transform(Matrix matrix) throws IOException;
123
public void setStrokingColor(Color color) throws IOException;
124
public void setStrokingColor(float c) throws IOException;
125
public void setStrokingColor(float c, float m, float y, float k) throws IOException;
126
public void setNonStrokingColor(Color color) throws IOException;
127
public void setNonStrokingColor(float c) throws IOException;
128
public void setNonStrokingColor(float c, float m, float y, float k) throws IOException;
129
public void setLineWidth(float lineWidth) throws IOException;
130
public void setLineCap(int lineCap) throws IOException;
131
public void setLineJoin(int lineJoin) throws IOException;
132
public void setMiterLimit(float miterLimit) throws IOException;
133
public void setLineDashPattern(float[] pattern, float phase) throws IOException;
134
```
135
136
## Operator Objects
137
138
Work with PDF operators and their operands.
139
140
```java { .api }
141
// Methods in org.apache.pdfbox.contentstream.operator.Operator
142
public String getName();
143
public List<COSBase> getOperands();
144
public void setOperands(List<COSBase> operands);
145
146
// Static factory methods
147
public static Operator getOperator(String name);
148
```
149
150
## Resource Management
151
152
Access and manage content stream resources.
153
154
```java { .api }
155
// Methods in org.apache.pdfbox.pdmodel.PDResources
156
public PDFont getFont(COSName name) throws IOException;
157
public PDXObject getXObject(COSName name) throws IOException;
158
public PDExtendedGraphicsState getExtGState(COSName name);
159
public PDColorSpace getColorSpace(COSName name) throws IOException;
160
public PDPattern getPattern(COSName name) throws IOException;
161
public PDShading getShading(COSName name) throws IOException;
162
163
// Resource modification
164
public void put(COSName name, PDFont font);
165
public void put(COSName name, PDXObject xobject);
166
public void put(COSName name, PDExtendedGraphicsState extGState);
167
public void put(COSName name, PDColorSpace colorSpace);
168
```
169
170
## Usage Examples
171
172
### Custom Content Stream Processor
173
174
```java
175
public class CustomContentProcessor extends PDFStreamEngine {
176
private List<String> textContent = new ArrayList<>();
177
private List<Rectangle2D> imagePositions = new ArrayList<>();
178
179
public CustomContentProcessor() throws IOException {
180
super();
181
}
182
183
@Override
184
protected void processTextPosition(TextPosition text) {
185
textContent.add(text.getUnicode());
186
System.out.println("Text: " + text.getUnicode() +
187
" at (" + text.getX() + ", " + text.getY() + ")");
188
}
189
190
@Override
191
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
192
String operatorName = operator.getName();
193
194
if ("Do".equals(operatorName)) {
195
// XObject (image) placement
196
COSName name = (COSName) operands.get(0);
197
System.out.println("Drawing XObject: " + name.getName());
198
}
199
200
super.processOperator(operator, operands);
201
}
202
203
public List<String> getTextContent() {
204
return textContent;
205
}
206
}
207
208
// Usage
209
PDDocument document = Loader.loadPDF(new File("document.pdf"));
210
CustomContentProcessor processor = new CustomContentProcessor();
211
212
for (int i = 0; i < document.getNumberOfPages(); i++) {
213
PDPage page = document.getPage(i);
214
processor.processPage(page);
215
}
216
217
List<String> extractedText = processor.getTextContent();
218
document.close();
219
```
220
221
### Custom Graphics Renderer
222
223
```java
224
public class SimpleGraphicsRenderer extends PDFGraphicsStreamEngine {
225
private Graphics2D graphics;
226
private AffineTransform baseTransform;
227
228
public SimpleGraphicsRenderer(PDPage page, Graphics2D graphics) {
229
super(page);
230
this.graphics = graphics;
231
this.baseTransform = graphics.getTransform();
232
}
233
234
@Override
235
protected void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
236
Path2D path = new Path2D.Float();
237
path.moveTo(p0.getX(), p0.getY());
238
path.lineTo(p1.getX(), p1.getY());
239
path.lineTo(p2.getX(), p2.getY());
240
path.lineTo(p3.getX(), p3.getY());
241
path.closePath();
242
243
graphics.draw(path);
244
}
245
246
@Override
247
protected void drawImage(PDImage pdImage) throws IOException {
248
BufferedImage image = pdImage.getImage();
249
Matrix matrix = getGraphicsState().getCurrentTransformationMatrix();
250
251
// Apply transformation and draw image
252
AffineTransform transform = matrix.createAffineTransform();
253
graphics.drawImage(image, transform, null);
254
}
255
256
@Override
257
protected void clip(int windingRule) throws IOException {
258
// Set clipping region
259
graphics.setClip(getCurrentPath());
260
}
261
262
@Override
263
protected void moveTo(float x, float y) throws IOException {
264
currentPath.moveTo(x, y);
265
}
266
267
@Override
268
protected void lineTo(float x, float y) throws IOException {
269
currentPath.lineTo(x, y);
270
}
271
272
@Override
273
protected void strokePath() throws IOException {
274
graphics.setStroke(createStroke());
275
graphics.setColor(getStrokingColor().toColor());
276
graphics.draw(currentPath);
277
}
278
279
@Override
280
protected void fillPath(int windingRule) throws IOException {
281
graphics.setColor(getNonStrokingColor().toColor());
282
graphics.fill(currentPath);
283
}
284
285
// ... implement other abstract methods
286
}
287
```
288
289
### Content Stream Generation
290
291
```java
292
PDDocument document = new PDDocument();
293
PDPage page = new PDPage(PDRectangle.A4);
294
document.addPage(page);
295
296
PDPageContentStream contentStream = new PDPageContentStream(document, page);
297
298
// Text operations
299
contentStream.beginText();
300
contentStream.setFont(PDType1Font.HELVETICA, 12);
301
contentStream.newLineAtOffset(100, 700);
302
contentStream.showText("Hello World!");
303
contentStream.endText();
304
305
// Graphics operations
306
contentStream.saveGraphicsState();
307
contentStream.setStrokingColor(Color.BLUE);
308
contentStream.setLineWidth(2);
309
310
// Draw rectangle
311
contentStream.addRect(100, 600, 200, 100);
312
contentStream.stroke();
313
314
// Draw circle (approximated with curves)
315
float centerX = 200, centerY = 500, radius = 50;
316
float kappa = 0.552284749831f; // 4/3 * (sqrt(2) - 1)
317
float offset = radius * kappa;
318
319
contentStream.moveTo(centerX, centerY + radius);
320
contentStream.curveTo(centerX + offset, centerY + radius, centerX + radius, centerY + offset, centerX + radius, centerY);
321
contentStream.curveTo(centerX + radius, centerY - offset, centerX + offset, centerY - radius, centerX, centerY - radius);
322
contentStream.curveTo(centerX - offset, centerY - radius, centerX - radius, centerY - offset, centerX - radius, centerY);
323
contentStream.curveTo(centerX - radius, centerY + offset, centerX - offset, centerY + radius, centerX, centerY + radius);
324
contentStream.fill();
325
326
contentStream.restoreGraphicsState();
327
contentStream.close();
328
329
document.save("custom-content.pdf");
330
document.close();
331
```
332
333
### Advanced Operator Processing
334
335
```java
336
public class OperatorAnalyzer extends PDFStreamEngine {
337
private Map<String, Integer> operatorCounts = new HashMap<>();
338
339
@Override
340
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
341
String name = operator.getName();
342
operatorCounts.put(name, operatorCounts.getOrDefault(name, 0) + 1);
343
344
// Log specific operators
345
switch (name) {
346
case "Tj": // Show text
347
System.out.println("Text operator: " + operands);
348
break;
349
case "cm": // Concatenate matrix
350
System.out.println("Transform matrix: " + operands);
351
break;
352
case "Do": // Invoke XObject
353
System.out.println("XObject invocation: " + operands);
354
break;
355
}
356
357
super.processOperator(operator, operands);
358
}
359
360
public Map<String, Integer> getOperatorCounts() {
361
return operatorCounts;
362
}
363
}
364
365
// Usage
366
PDDocument document = Loader.loadPDF(new File("document.pdf"));
367
OperatorAnalyzer analyzer = new OperatorAnalyzer();
368
369
PDPage page = document.getPage(0);
370
analyzer.processPage(page);
371
372
Map<String, Integer> counts = analyzer.getOperatorCounts();
373
counts.forEach((op, count) ->
374
System.out.println("Operator " + op + ": " + count + " times"));
375
376
document.close();
377
```