0
# I/O Utilities
1
2
I/O utilities providing enhanced input stream capabilities, temporary resource management, filename manipulation, and endian conversion utilities for robust document processing.
3
4
## Capabilities
5
6
### Enhanced Input Streams
7
8
#### TikaInputStream
9
10
Enhanced input stream wrapper providing file backing, mark/reset capabilities, and metadata extraction support.
11
12
```java { .api }
13
/**
14
* Enhanced input stream with file backing and metadata extraction capabilities
15
*/
16
public class TikaInputStream extends ProxyInputStream {
17
/**
18
* Wraps existing InputStream with TikaInputStream capabilities
19
* @param stream InputStream to wrap
20
* @return TikaInputStream wrapping the input stream
21
*/
22
public static TikaInputStream get(InputStream stream);
23
24
/**
25
* Creates TikaInputStream from File
26
* @param file File to create stream from
27
* @return TikaInputStream backed by the file
28
* @throws FileNotFoundException if file doesn't exist
29
*/
30
public static TikaInputStream get(File file) throws FileNotFoundException;
31
32
/**
33
* Creates TikaInputStream from Path
34
* @param path Path to create stream from
35
* @return TikaInputStream backed by the path
36
* @throws IOException if path cannot be accessed
37
*/
38
public static TikaInputStream get(Path path) throws IOException;
39
40
/**
41
* Creates TikaInputStream from URL
42
* @param url URL to create stream from
43
* @return TikaInputStream backed by URL content
44
* @throws IOException if URL cannot be accessed
45
*/
46
public static TikaInputStream get(URL url) throws IOException;
47
48
/**
49
* Creates TikaInputStream from byte array
50
* @param data Byte array containing data
51
* @return TikaInputStream backed by byte array
52
*/
53
public static TikaInputStream get(byte[] data);
54
55
/**
56
* Checks if stream is backed by a file
57
* @return true if stream has file backing
58
*/
59
public boolean hasFile();
60
61
/**
62
* Gets the backing file if available
63
* @return File backing this stream, or null if no file backing
64
*/
65
public File getFile();
66
67
/**
68
* Gets the file path if available
69
* @return Path backing this stream, or null if no path backing
70
*/
71
public Path getPath();
72
73
/**
74
* Gets or creates a temporary file containing stream data
75
* @return File containing all stream data
76
* @throws IOException if temporary file cannot be created
77
*/
78
public File getFileThreshold(int threshold) throws IOException;
79
80
/**
81
* Gets the length of the stream if known
82
* @return Stream length in bytes, or -1 if unknown
83
*/
84
public long getLength();
85
86
/**
87
* Gets the current position in the stream
88
* @return Current position in bytes from start
89
*/
90
public long getPosition();
91
92
/**
93
* Sets mark supported flag
94
* @param markSupported Whether mark/reset should be supported
95
*/
96
public void setMarkSupported(boolean markSupported);
97
98
/**
99
* Checks if mark/reset is supported
100
* @return true if mark/reset operations are supported
101
*/
102
@Override
103
public boolean markSupported();
104
105
/**
106
* Marks current position in stream
107
* @param readLimit Maximum bytes that can be read before mark becomes invalid
108
*/
109
@Override
110
public void mark(int readLimit);
111
112
/**
113
* Resets stream to marked position
114
* @throws IOException if reset is not supported or mark is invalid
115
*/
116
@Override
117
public void reset() throws IOException;
118
119
/**
120
* Reads specified number of bytes from current position
121
* @param buffer Buffer to read into
122
* @param offset Offset in buffer to start writing
123
* @param length Maximum number of bytes to read
124
* @return Number of bytes actually read, or -1 if end of stream
125
* @throws IOException if read operation fails
126
*/
127
@Override
128
public int read(byte[] buffer, int offset, int length) throws IOException;
129
130
/**
131
* Skips specified number of bytes
132
* @param n Number of bytes to skip
133
* @return Number of bytes actually skipped
134
* @throws IOException if skip operation fails
135
*/
136
@Override
137
public long skip(long n) throws IOException;
138
139
/**
140
* Closes stream and releases resources
141
* @throws IOException if close operation fails
142
*/
143
@Override
144
public void close() throws IOException;
145
}
146
```
147
148
#### BoundedInputStream
149
150
Input stream wrapper that limits the number of bytes that can be read from the underlying stream.
151
152
```java { .api }
153
/**
154
* Input stream that limits reading to specified number of bytes
155
*/
156
public class BoundedInputStream extends ProxyInputStream {
157
/**
158
* Creates bounded input stream with maximum read limit
159
* @param stream Underlying input stream
160
* @param maxBytes Maximum number of bytes to read
161
*/
162
public BoundedInputStream(InputStream stream, long maxBytes);
163
164
/**
165
* Gets the maximum number of bytes that can be read
166
* @return Maximum byte limit for this stream
167
*/
168
public long getMaxBytes();
169
170
/**
171
* Gets the number of bytes read so far
172
* @return Number of bytes read from start
173
*/
174
public long getBytesRead();
175
176
/**
177
* Gets the number of remaining bytes that can be read
178
* @return Remaining bytes before limit is reached
179
*/
180
public long getRemainingBytes();
181
182
/**
183
* Checks if byte limit has been reached
184
* @return true if no more bytes can be read due to limit
185
*/
186
public boolean isLimitReached();
187
188
/**
189
* Reads single byte from stream
190
* @return Byte value (0-255) or -1 if end of stream or limit reached
191
* @throws IOException if read operation fails
192
*/
193
@Override
194
public int read() throws IOException;
195
196
/**
197
* Reads bytes into buffer
198
* @param buffer Buffer to read into
199
* @param offset Offset in buffer to start writing
200
* @param length Maximum number of bytes to read
201
* @return Number of bytes read, or -1 if end of stream or limit reached
202
* @throws IOException if read operation fails
203
*/
204
@Override
205
public int read(byte[] buffer, int offset, int length) throws IOException;
206
207
/**
208
* Skips bytes in stream up to remaining limit
209
* @param n Number of bytes to skip
210
* @return Number of bytes actually skipped
211
* @throws IOException if skip operation fails
212
*/
213
@Override
214
public long skip(long n) throws IOException;
215
}
216
```
217
218
### Temporary Resource Management
219
220
#### TemporaryResources
221
222
Manager for temporary files and resources with automatic cleanup capabilities.
223
224
```java { .api }
225
/**
226
* Manager for temporary files and resources with automatic cleanup
227
*/
228
public class TemporaryResources implements Closeable {
229
/**
230
* Creates new temporary resources manager
231
*/
232
public TemporaryResources();
233
234
/**
235
* Creates temporary file with optional prefix and suffix
236
* @param prefix Prefix for temporary file name
237
* @param suffix Suffix for temporary file name
238
* @return File object for created temporary file
239
* @throws IOException if temporary file cannot be created
240
*/
241
public File createTemporaryFile(String prefix, String suffix) throws IOException;
242
243
/**
244
* Creates temporary file with default naming
245
* @return File object for created temporary file
246
* @throws IOException if temporary file cannot be created
247
*/
248
public File createTemporaryFile() throws IOException;
249
250
/**
251
* Creates temporary directory
252
* @param prefix Prefix for temporary directory name
253
* @return File object for created temporary directory
254
* @throws IOException if temporary directory cannot be created
255
*/
256
public File createTemporaryDirectory(String prefix) throws IOException;
257
258
/**
259
* Registers existing file for cleanup when resources are closed
260
* @param file File to register for automatic cleanup
261
*/
262
public void addToCleanupQueue(File file);
263
264
/**
265
* Creates TikaInputStream with temporary file backing
266
* @param stream Input stream to wrap
267
* @return TikaInputStream with temporary file backing
268
* @throws IOException if temporary file cannot be created
269
*/
270
public TikaInputStream createTikaInputStream(InputStream stream) throws IOException;
271
272
/**
273
* Copies input stream to temporary file
274
* @param stream Input stream to copy
275
* @param prefix Prefix for temporary file name
276
* @param suffix Suffix for temporary file name
277
* @return File containing copied stream data
278
* @throws IOException if copy operation fails
279
*/
280
public File copyToTemporaryFile(InputStream stream, String prefix, String suffix) throws IOException;
281
282
/**
283
* Gets list of all temporary files created
284
* @return List of File objects representing temporary files
285
*/
286
public List<File> getTemporaryFiles();
287
288
/**
289
* Gets total size of all temporary files
290
* @return Total size in bytes of all temporary files
291
*/
292
public long getTotalSize();
293
294
/**
295
* Cleans up all temporary resources
296
* @throws IOException if cleanup fails
297
*/
298
@Override
299
public void close() throws IOException;
300
}
301
```
302
303
### I/O Utility Methods
304
305
#### IOUtils
306
307
Collection of static utility methods for common I/O operations and stream handling.
308
309
```java { .api }
310
/**
311
* Static utility methods for I/O operations and stream handling
312
*/
313
public class IOUtils {
314
/**
315
* Copies all bytes from input stream to output stream
316
* @param input Source input stream
317
* @param output Destination output stream
318
* @return Number of bytes copied
319
* @throws IOException if copy operation fails
320
*/
321
public static long copy(InputStream input, OutputStream output) throws IOException;
322
323
/**
324
* Copies input stream to output stream with buffer size control
325
* @param input Source input stream
326
* @param output Destination output stream
327
* @param bufferSize Size of copy buffer in bytes
328
* @return Number of bytes copied
329
* @throws IOException if copy operation fails
330
*/
331
public static long copy(InputStream input, OutputStream output, int bufferSize) throws IOException;
332
333
/**
334
* Copies input stream to writer using specified encoding
335
* @param input Source input stream
336
* @param writer Destination writer
337
* @param encoding Character encoding to use
338
* @throws IOException if copy operation fails
339
*/
340
public static void copy(InputStream input, Writer writer, String encoding) throws IOException;
341
342
/**
343
* Reads all bytes from input stream into byte array
344
* @param input Input stream to read
345
* @return Byte array containing all stream data
346
* @throws IOException if read operation fails
347
*/
348
public static byte[] toByteArray(InputStream input) throws IOException;
349
350
/**
351
* Reads all characters from reader into string
352
* @param reader Reader to read from
353
* @return String containing all reader data
354
* @throws IOException if read operation fails
355
*/
356
public static String toString(Reader reader) throws IOException;
357
358
/**
359
* Reads input stream into string using specified encoding
360
* @param input Input stream to read
361
* @param encoding Character encoding to use
362
* @return String containing stream data
363
* @throws IOException if read operation fails
364
*/
365
public static String toString(InputStream input, String encoding) throws IOException;
366
367
/**
368
* Quietly closes closeable object without throwing exceptions
369
* @param closeable Object to close (may be null)
370
*/
371
public static void closeQuietly(Closeable closeable);
372
373
/**
374
* Quietly closes multiple closeable objects
375
* @param closeables Array of objects to close
376
*/
377
public static void closeQuietly(Closeable... closeables);
378
379
/**
380
* Skips exactly the specified number of bytes from input stream
381
* @param input Input stream to skip from
382
* @param toSkip Number of bytes to skip
383
* @throws IOException if skip operation fails or reaches end of stream
384
*/
385
public static void skipFully(InputStream input, long toSkip) throws IOException;
386
387
/**
388
* Reads exactly the specified number of bytes from input stream
389
* @param input Input stream to read from
390
* @param buffer Buffer to read into
391
* @param offset Offset in buffer to start writing
392
* @param length Number of bytes to read
393
* @throws IOException if read fails or reaches end of stream prematurely
394
*/
395
public static void readFully(InputStream input, byte[] buffer, int offset, int length) throws IOException;
396
}
397
```
398
399
### Filename Utilities
400
401
#### FilenameUtils
402
403
Utilities for filename manipulation, extension extraction, and path handling.
404
405
```java { .api }
406
/**
407
* Utilities for filename and path manipulation
408
*/
409
public class FilenameUtils {
410
/**
411
* Extracts file extension from filename
412
* @param filename Filename to extract extension from
413
* @return File extension without dot, or empty string if no extension
414
*/
415
public static String getExtension(String filename);
416
417
/**
418
* Gets basename of file without extension
419
* @param filename Filename to get basename from
420
* @return Filename without extension
421
*/
422
public static String getBaseName(String filename);
423
424
/**
425
* Gets filename without path components
426
* @param path Full path string
427
* @return Filename component only
428
*/
429
public static String getName(String path);
430
431
/**
432
* Gets parent directory path
433
* @param path Full path string
434
* @return Parent directory path, or null if no parent
435
*/
436
public static String getParent(String path);
437
438
/**
439
* Normalizes path separators to system format
440
* @param path Path to normalize
441
* @return Path with normalized separators
442
*/
443
public static String normalize(String path);
444
445
/**
446
* Removes extension from filename
447
* @param filename Filename to remove extension from
448
* @return Filename without extension
449
*/
450
public static String removeExtension(String filename);
451
452
/**
453
* Checks if path is absolute
454
* @param path Path to check
455
* @return true if path is absolute
456
*/
457
public static boolean isAbsolute(String path);
458
459
/**
460
* Concatenates paths with proper separators
461
* @param basePath Base path
462
* @param relativePath Relative path to append
463
* @return Combined path string
464
*/
465
public static String concat(String basePath, String relativePath);
466
467
/**
468
* Splits filename into name and extension parts
469
* @param filename Filename to split
470
* @return Array containing [basename, extension]
471
*/
472
public static String[] splitExtension(String filename);
473
}
474
```
475
476
### Endian Conversion Utilities
477
478
#### EndianUtils
479
480
Utilities for converting between little-endian and big-endian byte representations.
481
482
```java { .api }
483
/**
484
* Utilities for endian conversion and byte order manipulation
485
*/
486
public class EndianUtils {
487
/**
488
* Reads little-endian short from byte array
489
* @param data Byte array containing data
490
* @param offset Offset to start reading from
491
* @return Short value in host byte order
492
*/
493
public static short readSwappedShort(byte[] data, int offset);
494
495
/**
496
* Reads little-endian int from byte array
497
* @param data Byte array containing data
498
* @param offset Offset to start reading from
499
* @return Int value in host byte order
500
*/
501
public static int readSwappedInteger(byte[] data, int offset);
502
503
/**
504
* Reads little-endian long from byte array
505
* @param data Byte array containing data
506
* @param offset Offset to start reading from
507
* @return Long value in host byte order
508
*/
509
public static long readSwappedLong(byte[] data, int offset);
510
511
/**
512
* Reads little-endian float from byte array
513
* @param data Byte array containing data
514
* @param offset Offset to start reading from
515
* @return Float value in host byte order
516
*/
517
public static float readSwappedFloat(byte[] data, int offset);
518
519
/**
520
* Reads little-endian double from byte array
521
* @param data Byte array containing data
522
* @param offset Offset to start reading from
523
* @return Double value in host byte order
524
*/
525
public static double readSwappedDouble(byte[] data, int offset);
526
527
/**
528
* Writes short to byte array in little-endian format
529
* @param data Byte array to write to
530
* @param offset Offset to start writing at
531
* @param value Short value to write
532
*/
533
public static void writeSwappedShort(byte[] data, int offset, short value);
534
535
/**
536
* Writes int to byte array in little-endian format
537
* @param data Byte array to write to
538
* @param offset Offset to start writing at
539
* @param value Int value to write
540
*/
541
public static void writeSwappedInteger(byte[] data, int offset, int value);
542
543
/**
544
* Writes long to byte array in little-endian format
545
* @param data Byte array to write to
546
* @param offset Offset to start writing at
547
* @param value Long value to write
548
*/
549
public static void writeSwappedLong(byte[] data, int offset, long value);
550
551
/**
552
* Swaps byte order of short value
553
* @param value Short value to swap
554
* @return Short with swapped byte order
555
*/
556
public static short swapShort(short value);
557
558
/**
559
* Swaps byte order of int value
560
* @param value Int value to swap
561
* @return Int with swapped byte order
562
*/
563
public static int swapInteger(int value);
564
565
/**
566
* Swaps byte order of long value
567
* @param value Long value to swap
568
* @return Long with swapped byte order
569
*/
570
public static long swapLong(long value);
571
}
572
```
573
574
## Usage Examples
575
576
### Working with TikaInputStream
577
578
```java { .api }
579
// Create TikaInputStream from various sources
580
try (TikaInputStream tis = TikaInputStream.get(new FileInputStream("document.pdf"))) {
581
// Check if backed by file
582
if (tis.hasFile()) {
583
File file = tis.getFile();
584
System.out.println("File size: " + file.length());
585
}
586
587
// Use mark/reset capabilities
588
if (tis.markSupported()) {
589
tis.mark(1024);
590
byte[] header = new byte[10];
591
tis.read(header);
592
tis.reset(); // Return to marked position
593
}
594
595
// Get current position and length
596
System.out.println("Position: " + tis.getPosition());
597
System.out.println("Length: " + tis.getLength());
598
}
599
600
// Create from URL with temporary file backing
601
try (TikaInputStream tis = TikaInputStream.get(new URL("http://example.com/doc.pdf"))) {
602
// Stream content is downloaded to temporary file
603
File tempFile = tis.getFileThreshold(0);
604
System.out.println("Downloaded to: " + tempFile.getAbsolutePath());
605
}
606
```
607
608
### Temporary Resource Management
609
610
```java { .api }
611
// Use TemporaryResources for automatic cleanup
612
try (TemporaryResources tmp = new TemporaryResources()) {
613
// Create temporary files
614
File tempFile1 = tmp.createTemporaryFile("tika", ".tmp");
615
File tempDir = tmp.createTemporaryDirectory("tika-work");
616
617
// Process documents with temporary storage
618
try (InputStream input = new FileInputStream("large-document.pdf")) {
619
File workFile = tmp.copyToTemporaryFile(input, "work", ".pdf");
620
621
// Use workFile for processing
622
processDocument(workFile);
623
624
System.out.println("Total temp space: " + tmp.getTotalSize() + " bytes");
625
}
626
627
// All temporary files automatically cleaned up when closed
628
}
629
```
630
631
### Stream Copying and Conversion
632
633
```java { .api }
634
// Copy streams efficiently
635
try (InputStream input = new FileInputStream("source.txt");
636
OutputStream output = new FileOutputStream("destination.txt")) {
637
638
long bytesCopied = IOUtils.copy(input, output);
639
System.out.println("Copied " + bytesCopied + " bytes");
640
}
641
642
// Convert stream to string with encoding
643
try (InputStream input = new FileInputStream("text-file.txt")) {
644
String content = IOUtils.toString(input, "UTF-8");
645
System.out.println("Content: " + content);
646
}
647
648
// Read entire stream into byte array
649
try (InputStream input = new FileInputStream("binary-file.dat")) {
650
byte[] data = IOUtils.toByteArray(input);
651
System.out.println("Read " + data.length + " bytes");
652
}
653
```
654
655
### Bounded Stream Processing
656
657
```java { .api }
658
// Limit stream reading to prevent memory issues
659
try (InputStream input = new FileInputStream("huge-file.dat");
660
BoundedInputStream bounded = new BoundedInputStream(input, 1024 * 1024)) { // 1MB limit
661
662
byte[] buffer = new byte[8192];
663
int totalRead = 0;
664
665
while (true) {
666
int read = bounded.read(buffer);
667
if (read == -1 || bounded.isLimitReached()) {
668
break;
669
}
670
totalRead += read;
671
672
// Process buffer data
673
processData(buffer, 0, read);
674
}
675
676
System.out.println("Read " + totalRead + " bytes (limit: " + bounded.getMaxBytes() + ")");
677
}
678
```
679
680
### Filename and Path Utilities
681
682
```java { .api }
683
// Extract filename components
684
String filename = "document.backup.pdf";
685
String extension = FilenameUtils.getExtension(filename); // "pdf"
686
String basename = FilenameUtils.getBaseName(filename); // "document.backup"
687
String nameOnly = FilenameUtils.removeExtension(filename); // "document.backup"
688
689
// Path manipulation
690
String fullPath = "/home/user/documents/file.txt";
691
String name = FilenameUtils.getName(fullPath); // "file.txt"
692
String parent = FilenameUtils.getParent(fullPath); // "/home/user/documents"
693
694
// Split extension
695
String[] parts = FilenameUtils.splitExtension(filename); // ["document.backup", "pdf"]
696
697
// Path concatenation
698
String combined = FilenameUtils.concat("/home/user", "documents/file.txt");
699
```
700
701
### Endian Conversion
702
703
```java { .api }
704
// Read little-endian data from byte array
705
byte[] data = new byte[] {0x12, 0x34, 0x56, 0x78};
706
int littleEndianInt = EndianUtils.readSwappedInteger(data, 0);
707
System.out.println("Value: " + Integer.toHexString(littleEndianInt));
708
709
// Write values in little-endian format
710
byte[] output = new byte[8];
711
EndianUtils.writeSwappedInteger(output, 0, 0x12345678);
712
EndianUtils.writeSwappedInteger(output, 4, 0xABCDEF00);
713
714
// Swap byte order
715
short hostValue = 0x1234;
716
short swapped = EndianUtils.swapShort(hostValue);
717
System.out.println("Original: " + Integer.toHexString(hostValue));
718
System.out.println("Swapped: " + Integer.toHexString(swapped));
719
```
720
721
### Robust Stream Handling
722
723
```java { .api }
724
public class DocumentReader {
725
726
public String readDocument(InputStream input) throws IOException {
727
TemporaryResources tmp = new TemporaryResources();
728
729
try {
730
// Create TikaInputStream with temporary backing
731
TikaInputStream tis = tmp.createTikaInputStream(input);
732
733
// Limit reading to reasonable size
734
BoundedInputStream bounded = new BoundedInputStream(tis, 50 * 1024 * 1024); // 50MB
735
736
// Read content safely
737
StringBuilder content = new StringBuilder();
738
byte[] buffer = new byte[8192];
739
740
while (!bounded.isLimitReached()) {
741
int read = bounded.read(buffer);
742
if (read == -1) break;
743
744
content.append(new String(buffer, 0, read, "UTF-8"));
745
}
746
747
return content.toString();
748
749
} finally {
750
IOUtils.closeQuietly(tmp); // Cleanup all temporary resources
751
}
752
}
753
}
754
```