or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcontent-processing.mddetection.mdembedded-extraction.mdembedding.mdexceptions.mdindex.mdio-utilities.mdlanguage.mdmetadata.mdmime-types.mdparsing.mdpipes.mdprocess-forking.mdrendering.md

io-utilities.mddocs/

0

# I/O Utilities

1

2

I/O utilities providing enhanced input stream capabilities, temporary resource management, filename manipulation, and endian conversion utilities for robust document processing.

3

4

## Capabilities

5

6

### Enhanced Input Streams

7

8

#### TikaInputStream

9

10

Enhanced input stream wrapper providing file backing, mark/reset capabilities, and metadata extraction support.

11

12

```java { .api }

13

/**

14

* Enhanced input stream with file backing and metadata extraction capabilities

15

*/

16

public class TikaInputStream extends ProxyInputStream {

17

/**

18

* Wraps existing InputStream with TikaInputStream capabilities

19

* @param stream InputStream to wrap

20

* @return TikaInputStream wrapping the input stream

21

*/

22

public static TikaInputStream get(InputStream stream);

23

24

/**

25

* Creates TikaInputStream from File

26

* @param file File to create stream from

27

* @return TikaInputStream backed by the file

28

* @throws FileNotFoundException if file doesn't exist

29

*/

30

public static TikaInputStream get(File file) throws FileNotFoundException;

31

32

/**

33

* Creates TikaInputStream from Path

34

* @param path Path to create stream from

35

* @return TikaInputStream backed by the path

36

* @throws IOException if path cannot be accessed

37

*/

38

public static TikaInputStream get(Path path) throws IOException;

39

40

/**

41

* Creates TikaInputStream from URL

42

* @param url URL to create stream from

43

* @return TikaInputStream backed by URL content

44

* @throws IOException if URL cannot be accessed

45

*/

46

public static TikaInputStream get(URL url) throws IOException;

47

48

/**

49

* Creates TikaInputStream from byte array

50

* @param data Byte array containing data

51

* @return TikaInputStream backed by byte array

52

*/

53

public static TikaInputStream get(byte[] data);

54

55

/**

56

* Checks if stream is backed by a file

57

* @return true if stream has file backing

58

*/

59

public boolean hasFile();

60

61

/**

62

* Gets the backing file if available

63

* @return File backing this stream, or null if no file backing

64

*/

65

public File getFile();

66

67

/**

68

* Gets the file path if available

69

* @return Path backing this stream, or null if no path backing

70

*/

71

public Path getPath();

72

73

/**

74

* Gets or creates a temporary file containing stream data

75

* @return File containing all stream data

76

* @throws IOException if temporary file cannot be created

77

*/

78

public File getFileThreshold(int threshold) throws IOException;

79

80

/**

81

* Gets the length of the stream if known

82

* @return Stream length in bytes, or -1 if unknown

83

*/

84

public long getLength();

85

86

/**

87

* Gets the current position in the stream

88

* @return Current position in bytes from start

89

*/

90

public long getPosition();

91

92

/**

93

* Sets mark supported flag

94

* @param markSupported Whether mark/reset should be supported

95

*/

96

public void setMarkSupported(boolean markSupported);

97

98

/**

99

* Checks if mark/reset is supported

100

* @return true if mark/reset operations are supported

101

*/

102

@Override

103

public boolean markSupported();

104

105

/**

106

* Marks current position in stream

107

* @param readLimit Maximum bytes that can be read before mark becomes invalid

108

*/

109

@Override

110

public void mark(int readLimit);

111

112

/**

113

* Resets stream to marked position

114

* @throws IOException if reset is not supported or mark is invalid

115

*/

116

@Override

117

public void reset() throws IOException;

118

119

/**

120

* Reads specified number of bytes from current position

121

* @param buffer Buffer to read into

122

* @param offset Offset in buffer to start writing

123

* @param length Maximum number of bytes to read

124

* @return Number of bytes actually read, or -1 if end of stream

125

* @throws IOException if read operation fails

126

*/

127

@Override

128

public int read(byte[] buffer, int offset, int length) throws IOException;

129

130

/**

131

* Skips specified number of bytes

132

* @param n Number of bytes to skip

133

* @return Number of bytes actually skipped

134

* @throws IOException if skip operation fails

135

*/

136

@Override

137

public long skip(long n) throws IOException;

138

139

/**

140

* Closes stream and releases resources

141

* @throws IOException if close operation fails

142

*/

143

@Override

144

public void close() throws IOException;

145

}

146

```

147

148

#### BoundedInputStream

149

150

Input stream wrapper that limits the number of bytes that can be read from the underlying stream.

151

152

```java { .api }

153

/**

154

* Input stream that limits reading to specified number of bytes

155

*/

156

public class BoundedInputStream extends ProxyInputStream {

157

/**

158

* Creates bounded input stream with maximum read limit

159

* @param stream Underlying input stream

160

* @param maxBytes Maximum number of bytes to read

161

*/

162

public BoundedInputStream(InputStream stream, long maxBytes);

163

164

/**

165

* Gets the maximum number of bytes that can be read

166

* @return Maximum byte limit for this stream

167

*/

168

public long getMaxBytes();

169

170

/**

171

* Gets the number of bytes read so far

172

* @return Number of bytes read from start

173

*/

174

public long getBytesRead();

175

176

/**

177

* Gets the number of remaining bytes that can be read

178

* @return Remaining bytes before limit is reached

179

*/

180

public long getRemainingBytes();

181

182

/**

183

* Checks if byte limit has been reached

184

* @return true if no more bytes can be read due to limit

185

*/

186

public boolean isLimitReached();

187

188

/**

189

* Reads single byte from stream

190

* @return Byte value (0-255) or -1 if end of stream or limit reached

191

* @throws IOException if read operation fails

192

*/

193

@Override

194

public int read() throws IOException;

195

196

/**

197

* Reads bytes into buffer

198

* @param buffer Buffer to read into

199

* @param offset Offset in buffer to start writing

200

* @param length Maximum number of bytes to read

201

* @return Number of bytes read, or -1 if end of stream or limit reached

202

* @throws IOException if read operation fails

203

*/

204

@Override

205

public int read(byte[] buffer, int offset, int length) throws IOException;

206

207

/**

208

* Skips bytes in stream up to remaining limit

209

* @param n Number of bytes to skip

210

* @return Number of bytes actually skipped

211

* @throws IOException if skip operation fails

212

*/

213

@Override

214

public long skip(long n) throws IOException;

215

}

216

```

217

218

### Temporary Resource Management

219

220

#### TemporaryResources

221

222

Manager for temporary files and resources with automatic cleanup capabilities.

223

224

```java { .api }

225

/**

226

* Manager for temporary files and resources with automatic cleanup

227

*/

228

public class TemporaryResources implements Closeable {

229

/**

230

* Creates new temporary resources manager

231

*/

232

public TemporaryResources();

233

234

/**

235

* Creates temporary file with optional prefix and suffix

236

* @param prefix Prefix for temporary file name

237

* @param suffix Suffix for temporary file name

238

* @return File object for created temporary file

239

* @throws IOException if temporary file cannot be created

240

*/

241

public File createTemporaryFile(String prefix, String suffix) throws IOException;

242

243

/**

244

* Creates temporary file with default naming

245

* @return File object for created temporary file

246

* @throws IOException if temporary file cannot be created

247

*/

248

public File createTemporaryFile() throws IOException;

249

250

/**

251

* Creates temporary directory

252

* @param prefix Prefix for temporary directory name

253

* @return File object for created temporary directory

254

* @throws IOException if temporary directory cannot be created

255

*/

256

public File createTemporaryDirectory(String prefix) throws IOException;

257

258

/**

259

* Registers existing file for cleanup when resources are closed

260

* @param file File to register for automatic cleanup

261

*/

262

public void addToCleanupQueue(File file);

263

264

/**

265

* Creates TikaInputStream with temporary file backing

266

* @param stream Input stream to wrap

267

* @return TikaInputStream with temporary file backing

268

* @throws IOException if temporary file cannot be created

269

*/

270

public TikaInputStream createTikaInputStream(InputStream stream) throws IOException;

271

272

/**

273

* Copies input stream to temporary file

274

* @param stream Input stream to copy

275

* @param prefix Prefix for temporary file name

276

* @param suffix Suffix for temporary file name

277

* @return File containing copied stream data

278

* @throws IOException if copy operation fails

279

*/

280

public File copyToTemporaryFile(InputStream stream, String prefix, String suffix) throws IOException;

281

282

/**

283

* Gets list of all temporary files created

284

* @return List of File objects representing temporary files

285

*/

286

public List<File> getTemporaryFiles();

287

288

/**

289

* Gets total size of all temporary files

290

* @return Total size in bytes of all temporary files

291

*/

292

public long getTotalSize();

293

294

/**

295

* Cleans up all temporary resources

296

* @throws IOException if cleanup fails

297

*/

298

@Override

299

public void close() throws IOException;

300

}

301

```

302

303

### I/O Utility Methods

304

305

#### IOUtils

306

307

Collection of static utility methods for common I/O operations and stream handling.

308

309

```java { .api }

310

/**

311

* Static utility methods for I/O operations and stream handling

312

*/

313

public class IOUtils {

314

/**

315

* Copies all bytes from input stream to output stream

316

* @param input Source input stream

317

* @param output Destination output stream

318

* @return Number of bytes copied

319

* @throws IOException if copy operation fails

320

*/

321

public static long copy(InputStream input, OutputStream output) throws IOException;

322

323

/**

324

* Copies input stream to output stream with buffer size control

325

* @param input Source input stream

326

* @param output Destination output stream

327

* @param bufferSize Size of copy buffer in bytes

328

* @return Number of bytes copied

329

* @throws IOException if copy operation fails

330

*/

331

public static long copy(InputStream input, OutputStream output, int bufferSize) throws IOException;

332

333

/**

334

* Copies input stream to writer using specified encoding

335

* @param input Source input stream

336

* @param writer Destination writer

337

* @param encoding Character encoding to use

338

* @throws IOException if copy operation fails

339

*/

340

public static void copy(InputStream input, Writer writer, String encoding) throws IOException;

341

342

/**

343

* Reads all bytes from input stream into byte array

344

* @param input Input stream to read

345

* @return Byte array containing all stream data

346

* @throws IOException if read operation fails

347

*/

348

public static byte[] toByteArray(InputStream input) throws IOException;

349

350

/**

351

* Reads all characters from reader into string

352

* @param reader Reader to read from

353

* @return String containing all reader data

354

* @throws IOException if read operation fails

355

*/

356

public static String toString(Reader reader) throws IOException;

357

358

/**

359

* Reads input stream into string using specified encoding

360

* @param input Input stream to read

361

* @param encoding Character encoding to use

362

* @return String containing stream data

363

* @throws IOException if read operation fails

364

*/

365

public static String toString(InputStream input, String encoding) throws IOException;

366

367

/**

368

* Quietly closes closeable object without throwing exceptions

369

* @param closeable Object to close (may be null)

370

*/

371

public static void closeQuietly(Closeable closeable);

372

373

/**

374

* Quietly closes multiple closeable objects

375

* @param closeables Array of objects to close

376

*/

377

public static void closeQuietly(Closeable... closeables);

378

379

/**

380

* Skips exactly the specified number of bytes from input stream

381

* @param input Input stream to skip from

382

* @param toSkip Number of bytes to skip

383

* @throws IOException if skip operation fails or reaches end of stream

384

*/

385

public static void skipFully(InputStream input, long toSkip) throws IOException;

386

387

/**

388

* Reads exactly the specified number of bytes from input stream

389

* @param input Input stream to read from

390

* @param buffer Buffer to read into

391

* @param offset Offset in buffer to start writing

392

* @param length Number of bytes to read

393

* @throws IOException if read fails or reaches end of stream prematurely

394

*/

395

public static void readFully(InputStream input, byte[] buffer, int offset, int length) throws IOException;

396

}

397

```

398

399

### Filename Utilities

400

401

#### FilenameUtils

402

403

Utilities for filename manipulation, extension extraction, and path handling.

404

405

```java { .api }

406

/**

407

* Utilities for filename and path manipulation

408

*/

409

public class FilenameUtils {

410

/**

411

* Extracts file extension from filename

412

* @param filename Filename to extract extension from

413

* @return File extension without dot, or empty string if no extension

414

*/

415

public static String getExtension(String filename);

416

417

/**

418

* Gets basename of file without extension

419

* @param filename Filename to get basename from

420

* @return Filename without extension

421

*/

422

public static String getBaseName(String filename);

423

424

/**

425

* Gets filename without path components

426

* @param path Full path string

427

* @return Filename component only

428

*/

429

public static String getName(String path);

430

431

/**

432

* Gets parent directory path

433

* @param path Full path string

434

* @return Parent directory path, or null if no parent

435

*/

436

public static String getParent(String path);

437

438

/**

439

* Normalizes path separators to system format

440

* @param path Path to normalize

441

* @return Path with normalized separators

442

*/

443

public static String normalize(String path);

444

445

/**

446

* Removes extension from filename

447

* @param filename Filename to remove extension from

448

* @return Filename without extension

449

*/

450

public static String removeExtension(String filename);

451

452

/**

453

* Checks if path is absolute

454

* @param path Path to check

455

* @return true if path is absolute

456

*/

457

public static boolean isAbsolute(String path);

458

459

/**

460

* Concatenates paths with proper separators

461

* @param basePath Base path

462

* @param relativePath Relative path to append

463

* @return Combined path string

464

*/

465

public static String concat(String basePath, String relativePath);

466

467

/**

468

* Splits filename into name and extension parts

469

* @param filename Filename to split

470

* @return Array containing [basename, extension]

471

*/

472

public static String[] splitExtension(String filename);

473

}

474

```

475

476

### Endian Conversion Utilities

477

478

#### EndianUtils

479

480

Utilities for converting between little-endian and big-endian byte representations.

481

482

```java { .api }

483

/**

484

* Utilities for endian conversion and byte order manipulation

485

*/

486

public class EndianUtils {

487

/**

488

* Reads little-endian short from byte array

489

* @param data Byte array containing data

490

* @param offset Offset to start reading from

491

* @return Short value in host byte order

492

*/

493

public static short readSwappedShort(byte[] data, int offset);

494

495

/**

496

* Reads little-endian int from byte array

497

* @param data Byte array containing data

498

* @param offset Offset to start reading from

499

* @return Int value in host byte order

500

*/

501

public static int readSwappedInteger(byte[] data, int offset);

502

503

/**

504

* Reads little-endian long from byte array

505

* @param data Byte array containing data

506

* @param offset Offset to start reading from

507

* @return Long value in host byte order

508

*/

509

public static long readSwappedLong(byte[] data, int offset);

510

511

/**

512

* Reads little-endian float from byte array

513

* @param data Byte array containing data

514

* @param offset Offset to start reading from

515

* @return Float value in host byte order

516

*/

517

public static float readSwappedFloat(byte[] data, int offset);

518

519

/**

520

* Reads little-endian double from byte array

521

* @param data Byte array containing data

522

* @param offset Offset to start reading from

523

* @return Double value in host byte order

524

*/

525

public static double readSwappedDouble(byte[] data, int offset);

526

527

/**

528

* Writes short to byte array in little-endian format

529

* @param data Byte array to write to

530

* @param offset Offset to start writing at

531

* @param value Short value to write

532

*/

533

public static void writeSwappedShort(byte[] data, int offset, short value);

534

535

/**

536

* Writes int to byte array in little-endian format

537

* @param data Byte array to write to

538

* @param offset Offset to start writing at

539

* @param value Int value to write

540

*/

541

public static void writeSwappedInteger(byte[] data, int offset, int value);

542

543

/**

544

* Writes long to byte array in little-endian format

545

* @param data Byte array to write to

546

* @param offset Offset to start writing at

547

* @param value Long value to write

548

*/

549

public static void writeSwappedLong(byte[] data, int offset, long value);

550

551

/**

552

* Swaps byte order of short value

553

* @param value Short value to swap

554

* @return Short with swapped byte order

555

*/

556

public static short swapShort(short value);

557

558

/**

559

* Swaps byte order of int value

560

* @param value Int value to swap

561

* @return Int with swapped byte order

562

*/

563

public static int swapInteger(int value);

564

565

/**

566

* Swaps byte order of long value

567

* @param value Long value to swap

568

* @return Long with swapped byte order

569

*/

570

public static long swapLong(long value);

571

}

572

```

573

574

## Usage Examples

575

576

### Working with TikaInputStream

577

578

```java { .api }

579

// Create TikaInputStream from various sources

580

try (TikaInputStream tis = TikaInputStream.get(new FileInputStream("document.pdf"))) {

581

// Check if backed by file

582

if (tis.hasFile()) {

583

File file = tis.getFile();

584

System.out.println("File size: " + file.length());

585

}

586

587

// Use mark/reset capabilities

588

if (tis.markSupported()) {

589

tis.mark(1024);

590

byte[] header = new byte[10];

591

tis.read(header);

592

tis.reset(); // Return to marked position

593

}

594

595

// Get current position and length

596

System.out.println("Position: " + tis.getPosition());

597

System.out.println("Length: " + tis.getLength());

598

}

599

600

// Create from URL with temporary file backing

601

try (TikaInputStream tis = TikaInputStream.get(new URL("http://example.com/doc.pdf"))) {

602

// Stream content is downloaded to temporary file

603

File tempFile = tis.getFileThreshold(0);

604

System.out.println("Downloaded to: " + tempFile.getAbsolutePath());

605

}

606

```

607

608

### Temporary Resource Management

609

610

```java { .api }

611

// Use TemporaryResources for automatic cleanup

612

try (TemporaryResources tmp = new TemporaryResources()) {

613

// Create temporary files

614

File tempFile1 = tmp.createTemporaryFile("tika", ".tmp");

615

File tempDir = tmp.createTemporaryDirectory("tika-work");

616

617

// Process documents with temporary storage

618

try (InputStream input = new FileInputStream("large-document.pdf")) {

619

File workFile = tmp.copyToTemporaryFile(input, "work", ".pdf");

620

621

// Use workFile for processing

622

processDocument(workFile);

623

624

System.out.println("Total temp space: " + tmp.getTotalSize() + " bytes");

625

}

626

627

// All temporary files automatically cleaned up when closed

628

}

629

```

630

631

### Stream Copying and Conversion

632

633

```java { .api }

634

// Copy streams efficiently

635

try (InputStream input = new FileInputStream("source.txt");

636

OutputStream output = new FileOutputStream("destination.txt")) {

637

638

long bytesCopied = IOUtils.copy(input, output);

639

System.out.println("Copied " + bytesCopied + " bytes");

640

}

641

642

// Convert stream to string with encoding

643

try (InputStream input = new FileInputStream("text-file.txt")) {

644

String content = IOUtils.toString(input, "UTF-8");

645

System.out.println("Content: " + content);

646

}

647

648

// Read entire stream into byte array

649

try (InputStream input = new FileInputStream("binary-file.dat")) {

650

byte[] data = IOUtils.toByteArray(input);

651

System.out.println("Read " + data.length + " bytes");

652

}

653

```

654

655

### Bounded Stream Processing

656

657

```java { .api }

658

// Limit stream reading to prevent memory issues

659

try (InputStream input = new FileInputStream("huge-file.dat");

660

BoundedInputStream bounded = new BoundedInputStream(input, 1024 * 1024)) { // 1MB limit

661

662

byte[] buffer = new byte[8192];

663

int totalRead = 0;

664

665

while (true) {

666

int read = bounded.read(buffer);

667

if (read == -1 || bounded.isLimitReached()) {

668

break;

669

}

670

totalRead += read;

671

672

// Process buffer data

673

processData(buffer, 0, read);

674

}

675

676

System.out.println("Read " + totalRead + " bytes (limit: " + bounded.getMaxBytes() + ")");

677

}

678

```

679

680

### Filename and Path Utilities

681

682

```java { .api }

683

// Extract filename components

684

String filename = "document.backup.pdf";

685

String extension = FilenameUtils.getExtension(filename); // "pdf"

686

String basename = FilenameUtils.getBaseName(filename); // "document.backup"

687

String nameOnly = FilenameUtils.removeExtension(filename); // "document.backup"

688

689

// Path manipulation

690

String fullPath = "/home/user/documents/file.txt";

691

String name = FilenameUtils.getName(fullPath); // "file.txt"

692

String parent = FilenameUtils.getParent(fullPath); // "/home/user/documents"

693

694

// Split extension

695

String[] parts = FilenameUtils.splitExtension(filename); // ["document.backup", "pdf"]

696

697

// Path concatenation

698

String combined = FilenameUtils.concat("/home/user", "documents/file.txt");

699

```

700

701

### Endian Conversion

702

703

```java { .api }

704

// Read little-endian data from byte array

705

byte[] data = new byte[] {0x12, 0x34, 0x56, 0x78};

706

int littleEndianInt = EndianUtils.readSwappedInteger(data, 0);

707

System.out.println("Value: " + Integer.toHexString(littleEndianInt));

708

709

// Write values in little-endian format

710

byte[] output = new byte[8];

711

EndianUtils.writeSwappedInteger(output, 0, 0x12345678);

712

EndianUtils.writeSwappedInteger(output, 4, 0xABCDEF00);

713

714

// Swap byte order

715

short hostValue = 0x1234;

716

short swapped = EndianUtils.swapShort(hostValue);

717

System.out.println("Original: " + Integer.toHexString(hostValue));

718

System.out.println("Swapped: " + Integer.toHexString(swapped));

719

```

720

721

### Robust Stream Handling

722

723

```java { .api }

724

public class DocumentReader {

725

726

public String readDocument(InputStream input) throws IOException {

727

TemporaryResources tmp = new TemporaryResources();

728

729

try {

730

// Create TikaInputStream with temporary backing

731

TikaInputStream tis = tmp.createTikaInputStream(input);

732

733

// Limit reading to reasonable size

734

BoundedInputStream bounded = new BoundedInputStream(tis, 50 * 1024 * 1024); // 50MB

735

736

// Read content safely

737

StringBuilder content = new StringBuilder();

738

byte[] buffer = new byte[8192];

739

740

while (!bounded.isLimitReached()) {

741

int read = bounded.read(buffer);

742

if (read == -1) break;

743

744

content.append(new String(buffer, 0, read, "UTF-8"));

745

}

746

747

return content.toString();

748

749

} finally {

750

IOUtils.closeQuietly(tmp); // Cleanup all temporary resources

751

}

752

}

753

}

754

```