or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

extraction.mdfile-operations.mdgeneration.mdindex.mdloading.mdutilities.md
tile.json

extraction.mddocs/

0

# File Content Extraction

1

2

Extract file contents from ZIP archives in various formats with support for streaming, progress tracking, and platform-specific optimizations. Provides access to individual file data within loaded ZIP files.

3

4

## Capabilities

5

6

### Asynchronous Content Extraction

7

8

Extract file content in various output formats asynchronously.

9

10

```javascript { .api }

11

/**

12

* Extract file content in specified format

13

* @param type - Output format type

14

* @param onUpdate - Optional progress callback

15

* @returns Promise resolving to file content in specified format

16

*/

17

async<T extends OutputType>(type: T, onUpdate?: OnUpdateCallback): Promise<OutputByType[T]>;

18

```

19

20

**Usage Examples:**

21

22

```javascript

23

import JSZip from "jszip";

24

25

// Load a ZIP file

26

const zip = await JSZip.loadAsync(zipData);

27

28

// Extract text content

29

const textFile = zip.file("readme.txt");

30

if (textFile) {

31

const content = await textFile.async("string");

32

console.log("Text content:", content);

33

}

34

35

// Extract binary content

36

const imageFile = zip.file("photo.jpg");

37

if (imageFile) {

38

const arrayBuffer = await imageFile.async("arraybuffer");

39

const uint8Array = await imageFile.async("uint8array");

40

const blob = await imageFile.async("blob");

41

}

42

43

// Extract with progress tracking

44

const largeFile = zip.file("large-video.mp4");

45

if (largeFile) {

46

const content = await largeFile.async("arraybuffer", (metadata) => {

47

console.log(`Extracting: ${metadata.percent}%`);

48

if (metadata.currentFile) {

49

console.log(`Processing: ${metadata.currentFile}`);

50

}

51

});

52

}

53

54

// Extract different formats

55

const dataFile = zip.file("data.bin");

56

if (dataFile) {

57

const base64 = await dataFile.async("base64");

58

const binaryString = await dataFile.async("binarystring");

59

const numberArray = await dataFile.async("array");

60

const nodeBuffer = await dataFile.async("nodebuffer"); // Node.js only

61

}

62

```

63

64

### Node.js Stream Extraction

65

66

Extract file content as a Node.js ReadableStream for efficient processing of large files.

67

68

```javascript { .api }

69

/**

70

* Extract file content as Node.js ReadableStream

71

* @param type - Stream type (defaults to 'nodebuffer')

72

* @param onUpdate - Optional progress callback

73

* @returns Node.js ReadableStream containing file data

74

*/

75

nodeStream(type?: 'nodebuffer', onUpdate?: OnUpdateCallback): NodeJS.ReadableStream;

76

```

77

78

**Usage Examples:**

79

80

```javascript

81

import JSZip from "jszip";

82

import fs from "fs";

83

84

// Load ZIP file

85

const zip = await JSZip.loadAsync(fs.readFileSync('archive.zip'));

86

87

// Extract large file as stream

88

const videoFile = zip.file("presentation.mp4");

89

if (videoFile) {

90

const stream = videoFile.nodeStream('nodebuffer');

91

92

// Pipe to output file

93

stream.pipe(fs.createWriteStream('extracted-video.mp4'));

94

95

// Handle stream events

96

stream.on('data', (chunk) => {

97

console.log(`Received ${chunk.length} bytes`);

98

});

99

100

stream.on('end', () => {

101

console.log('File extraction completed');

102

});

103

104

stream.on('error', (error) => {

105

console.error('Extraction error:', error);

106

});

107

}

108

109

// Extract with progress tracking

110

const documentFile = zip.file("large-document.pdf");

111

if (documentFile) {

112

const stream = documentFile.nodeStream('nodebuffer', (metadata) => {

113

console.log(`Extraction progress: ${metadata.percent}%`);

114

});

115

116

stream.pipe(fs.createWriteStream('extracted-document.pdf'));

117

}

118

```

119

120

## JSZipObject Properties

121

122

Access metadata and properties of individual files within the ZIP archive.

123

124

```javascript { .api }

125

interface JSZipObject {

126

/** Absolute path of the file within the ZIP */

127

name: string;

128

/** Original filename from ZIP (may contain path traversal) */

129

unsafeOriginalName?: string;

130

/** Whether this entry is a directory */

131

dir: boolean;

132

/** Last modification date */

133

date: Date;

134

/** File comment */

135

comment: string;

136

/** UNIX file permissions */

137

unixPermissions: number | string | null;

138

/** DOS file permissions */

139

dosPermissions: number | null;

140

/** File options including compression settings */

141

options: JSZipObjectOptions;

142

}

143

144

interface JSZipObjectOptions {

145

/** Compression method used for this file */

146

compression: Compression;

147

}

148

```

149

150

**Usage Examples:**

151

152

```javascript

153

const zip = await JSZip.loadAsync(zipData);

154

155

// Inspect file properties

156

const file = zip.file("document.txt");

157

if (file) {

158

console.log("File metadata:", {

159

name: file.name,

160

isDirectory: file.dir,

161

lastModified: file.date,

162

comment: file.comment,

163

compression: file.options.compression,

164

unixPermissions: file.unixPermissions,

165

dosPermissions: file.dosPermissions

166

});

167

168

// Check for security issues

169

if (file.unsafeOriginalName) {

170

console.warn(`Original name: ${file.unsafeOriginalName}`);

171

console.warn(`Sanitized name: ${file.name}`);

172

}

173

}

174

175

// List all files with metadata

176

zip.forEach((relativePath, file) => {

177

if (!file.dir) {

178

console.log(`File: ${file.name}`);

179

console.log(` Size: ${file._data?.uncompressedSize || 'unknown'}`);

180

console.log(` Compressed: ${file._data?.compressedSize || 'unknown'}`);

181

console.log(` Compression: ${file.options.compression}`);

182

console.log(` Modified: ${file.date.toISOString()}`);

183

184

if (file.comment) {

185

console.log(` Comment: ${file.comment}`);

186

}

187

}

188

});

189

```

190

191

## Output Format Types

192

193

Various formats for extracting file content.

194

195

```javascript { .api }

196

type OutputType = keyof OutputByType;

197

198

interface OutputByType {

199

base64: string;

200

string: string;

201

text: string;

202

binarystring: string;

203

array: number[];

204

uint8array: Uint8Array;

205

arraybuffer: ArrayBuffer;

206

blob: Blob;

207

nodebuffer: Buffer;

208

}

209

```

210

211

**Usage Examples:**

212

213

```javascript

214

const zip = await JSZip.loadAsync(zipData);

215

const file = zip.file("example.bin");

216

217

if (file) {

218

// Text formats

219

const base64String = await file.async("base64");

220

const textString = await file.async("string");

221

const binaryString = await file.async("binarystring");

222

223

// Binary formats

224

const uint8Array = await file.async("uint8array");

225

const arrayBuffer = await file.async("arraybuffer");

226

const numberArray = await file.async("array");

227

228

// Platform-specific formats

229

const blob = await file.async("blob"); // Browser

230

const buffer = await file.async("nodebuffer"); // Node.js

231

}

232

233

// Choose format based on file type

234

const textFile = zip.file("readme.txt");

235

const imageFile = zip.file("photo.png");

236

const dataFile = zip.file("binary.dat");

237

238

if (textFile) {

239

const text = await textFile.async("string");

240

console.log("Text content:", text);

241

}

242

243

if (imageFile) {

244

// For images, use blob in browser or buffer in Node.js

245

const imageBlob = await imageFile.async("blob");

246

const imageUrl = URL.createObjectURL(imageBlob);

247

}

248

249

if (dataFile) {

250

// For binary data, use ArrayBuffer or Uint8Array

251

const binaryData = await dataFile.async("uint8array");

252

processBinaryData(binaryData);

253

}

254

```

255

256

## Progress Tracking

257

258

Monitor file extraction progress with detailed metadata.

259

260

```javascript { .api }

261

interface JSZipMetadata {

262

/** Completion percentage (0-100) */

263

percent: number;

264

/** Currently processing file path */

265

currentFile: string | null;

266

}

267

268

type OnUpdateCallback = (metadata: JSZipMetadata) => void;

269

```

270

271

**Usage Examples:**

272

273

```javascript

274

const zip = await JSZip.loadAsync(zipData);

275

const largeFile = zip.file("huge-dataset.csv");

276

277

if (largeFile) {

278

// Extract with progress tracking

279

const content = await largeFile.async("string", (metadata) => {

280

// Update UI progress bar

281

updateProgressBar(metadata.percent);

282

283

// Show current status

284

console.log(`Extracting: ${metadata.percent}%`);

285

286

if (metadata.currentFile) {

287

console.log(`Processing: ${metadata.currentFile}`);

288

}

289

290

// Handle completion

291

if (metadata.percent === 100) {

292

console.log("Extraction completed!");

293

}

294

});

295

296

console.log("Final content length:", content.length);

297

}

298

299

// Progress tracking with streaming

300

const videoFile = zip.file("presentation.mp4");

301

if (videoFile) {

302

const stream = videoFile.nodeStream('nodebuffer', (metadata) => {

303

document.getElementById('status').textContent =

304

`Extracting: ${metadata.percent}% - ${metadata.currentFile || 'Processing...'}`;

305

});

306

307

stream.pipe(fs.createWriteStream('output.mp4'));

308

}

309

```

310

311

## Batch Extraction

312

313

Extract multiple files efficiently with various strategies.

314

315

**Usage Examples:**

316

317

```javascript

318

const zip = await JSZip.loadAsync(zipData);

319

320

// Extract all text files

321

const textFiles = zip.filter((relativePath, file) => {

322

return !file.dir && (relativePath.endsWith('.txt') || relativePath.endsWith('.md'));

323

});

324

325

const extractedTexts = {};

326

for (const file of textFiles) {

327

extractedTexts[file.name] = await file.async("string");

328

}

329

330

// Extract all images as blobs (browser)

331

const imageFiles = zip.filter((relativePath, file) => {

332

return !file.dir && /\.(png|jpg|jpeg|gif)$/i.test(relativePath);

333

});

334

335

const extractedImages = {};

336

for (const file of imageFiles) {

337

extractedImages[file.name] = await file.async("blob");

338

}

339

340

// Parallel extraction with Promise.all

341

const allFiles = zip.filter((relativePath, file) => !file.dir);

342

const fileContents = await Promise.all(

343

allFiles.map(async (file) => ({

344

name: file.name,

345

content: await file.async("uint8array"),

346

metadata: {

347

date: file.date,

348

size: file._data?.uncompressedSize,

349

compression: file.options.compression

350

}

351

}))

352

);

353

354

// Extract with size limits

355

const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB

356

const safeFiles = zip.filter((relativePath, file) => {

357

const size = file._data?.uncompressedSize || 0;

358

return !file.dir && size <= MAX_FILE_SIZE;

359

});

360

361

const safeExtractions = {};

362

for (const file of safeFiles) {

363

safeExtractions[file.name] = await file.async("string");

364

}

365

```

366

367

## Error Handling

368

369

Handle extraction errors and validation failures.

370

371

**Usage Examples:**

372

373

```javascript

374

const zip = await JSZip.loadAsync(zipData);

375

376

// Handle missing files

377

const file = zip.file("might-not-exist.txt");

378

if (file) {

379

try {

380

const content = await file.async("string");

381

console.log("Content:", content);

382

} catch (error) {

383

console.error("Extraction failed:", error.message);

384

}

385

} else {

386

console.log("File not found in ZIP");

387

}

388

389

// Handle corrupted files

390

try {

391

const suspiciousFile = zip.file("suspicious.bin");

392

if (suspiciousFile) {

393

const content = await suspiciousFile.async("uint8array");

394

console.log("Extracted successfully");

395

}

396

} catch (error) {

397

if (error.message.includes("CRC32")) {

398

console.error("File is corrupted (CRC mismatch)");

399

} else if (error.message.includes("compression")) {

400

console.error("Unsupported compression method");

401

} else {

402

console.error("Unknown extraction error:", error.message);

403

}

404

}

405

406

// Robust extraction with fallbacks

407

async function safeExtractText(zip, filename) {

408

const file = zip.file(filename);

409

if (!file) {

410

return null;

411

}

412

413

try {

414

return await file.async("string");

415

} catch (error) {

416

console.warn(`Failed to extract ${filename} as text:`, error.message);

417

418

try {

419

// Try binary extraction as fallback

420

const binary = await file.async("uint8array");

421

return new TextDecoder('utf-8', { fatal: false }).decode(binary);

422

} catch (fallbackError) {

423

console.error(`Complete extraction failure for ${filename}:`, fallbackError.message);

424

return null;

425

}

426

}

427

}

428

```