0
# File Content Extraction
1
2
Extract file contents from ZIP archives in various formats with support for streaming, progress tracking, and platform-specific optimizations. Provides access to individual file data within loaded ZIP files.
3
4
## Capabilities
5
6
### Asynchronous Content Extraction
7
8
Extract file content in various output formats asynchronously.
9
10
```javascript { .api }
11
/**
12
* Extract file content in specified format
13
* @param type - Output format type
14
* @param onUpdate - Optional progress callback
15
* @returns Promise resolving to file content in specified format
16
*/
17
async<T extends OutputType>(type: T, onUpdate?: OnUpdateCallback): Promise<OutputByType[T]>;
18
```
19
20
**Usage Examples:**
21
22
```javascript
23
import JSZip from "jszip";
24
25
// Load a ZIP file
26
const zip = await JSZip.loadAsync(zipData);
27
28
// Extract text content
29
const textFile = zip.file("readme.txt");
30
if (textFile) {
31
const content = await textFile.async("string");
32
console.log("Text content:", content);
33
}
34
35
// Extract binary content
36
const imageFile = zip.file("photo.jpg");
37
if (imageFile) {
38
const arrayBuffer = await imageFile.async("arraybuffer");
39
const uint8Array = await imageFile.async("uint8array");
40
const blob = await imageFile.async("blob");
41
}
42
43
// Extract with progress tracking
44
const largeFile = zip.file("large-video.mp4");
45
if (largeFile) {
46
const content = await largeFile.async("arraybuffer", (metadata) => {
47
console.log(`Extracting: ${metadata.percent}%`);
48
if (metadata.currentFile) {
49
console.log(`Processing: ${metadata.currentFile}`);
50
}
51
});
52
}
53
54
// Extract different formats
55
const dataFile = zip.file("data.bin");
56
if (dataFile) {
57
const base64 = await dataFile.async("base64");
58
const binaryString = await dataFile.async("binarystring");
59
const numberArray = await dataFile.async("array");
60
const nodeBuffer = await dataFile.async("nodebuffer"); // Node.js only
61
}
62
```
63
64
### Node.js Stream Extraction
65
66
Extract file content as a Node.js ReadableStream for efficient processing of large files.
67
68
```javascript { .api }
69
/**
70
* Extract file content as Node.js ReadableStream
71
* @param type - Stream type (defaults to 'nodebuffer')
72
* @param onUpdate - Optional progress callback
73
* @returns Node.js ReadableStream containing file data
74
*/
75
nodeStream(type?: 'nodebuffer', onUpdate?: OnUpdateCallback): NodeJS.ReadableStream;
76
```
77
78
**Usage Examples:**
79
80
```javascript
81
import JSZip from "jszip";
82
import fs from "fs";
83
84
// Load ZIP file
85
const zip = await JSZip.loadAsync(fs.readFileSync('archive.zip'));
86
87
// Extract large file as stream
88
const videoFile = zip.file("presentation.mp4");
89
if (videoFile) {
90
const stream = videoFile.nodeStream('nodebuffer');
91
92
// Pipe to output file
93
stream.pipe(fs.createWriteStream('extracted-video.mp4'));
94
95
// Handle stream events
96
stream.on('data', (chunk) => {
97
console.log(`Received ${chunk.length} bytes`);
98
});
99
100
stream.on('end', () => {
101
console.log('File extraction completed');
102
});
103
104
stream.on('error', (error) => {
105
console.error('Extraction error:', error);
106
});
107
}
108
109
// Extract with progress tracking
110
const documentFile = zip.file("large-document.pdf");
111
if (documentFile) {
112
const stream = documentFile.nodeStream('nodebuffer', (metadata) => {
113
console.log(`Extraction progress: ${metadata.percent}%`);
114
});
115
116
stream.pipe(fs.createWriteStream('extracted-document.pdf'));
117
}
118
```
119
120
## JSZipObject Properties
121
122
Access metadata and properties of individual files within the ZIP archive.
123
124
```javascript { .api }
125
interface JSZipObject {
126
/** Absolute path of the file within the ZIP */
127
name: string;
128
/** Original filename from ZIP (may contain path traversal) */
129
unsafeOriginalName?: string;
130
/** Whether this entry is a directory */
131
dir: boolean;
132
/** Last modification date */
133
date: Date;
134
/** File comment */
135
comment: string;
136
/** UNIX file permissions */
137
unixPermissions: number | string | null;
138
/** DOS file permissions */
139
dosPermissions: number | null;
140
/** File options including compression settings */
141
options: JSZipObjectOptions;
142
}
143
144
interface JSZipObjectOptions {
145
/** Compression method used for this file */
146
compression: Compression;
147
}
148
```
149
150
**Usage Examples:**
151
152
```javascript
153
const zip = await JSZip.loadAsync(zipData);
154
155
// Inspect file properties
156
const file = zip.file("document.txt");
157
if (file) {
158
console.log("File metadata:", {
159
name: file.name,
160
isDirectory: file.dir,
161
lastModified: file.date,
162
comment: file.comment,
163
compression: file.options.compression,
164
unixPermissions: file.unixPermissions,
165
dosPermissions: file.dosPermissions
166
});
167
168
// Check for security issues
169
if (file.unsafeOriginalName) {
170
console.warn(`Original name: ${file.unsafeOriginalName}`);
171
console.warn(`Sanitized name: ${file.name}`);
172
}
173
}
174
175
// List all files with metadata
176
zip.forEach((relativePath, file) => {
177
if (!file.dir) {
178
console.log(`File: ${file.name}`);
179
console.log(` Size: ${file._data?.uncompressedSize || 'unknown'}`);
180
console.log(` Compressed: ${file._data?.compressedSize || 'unknown'}`);
181
console.log(` Compression: ${file.options.compression}`);
182
console.log(` Modified: ${file.date.toISOString()}`);
183
184
if (file.comment) {
185
console.log(` Comment: ${file.comment}`);
186
}
187
}
188
});
189
```
190
191
## Output Format Types
192
193
Various formats for extracting file content.
194
195
```javascript { .api }
196
type OutputType = keyof OutputByType;
197
198
interface OutputByType {
199
base64: string;
200
string: string;
201
text: string;
202
binarystring: string;
203
array: number[];
204
uint8array: Uint8Array;
205
arraybuffer: ArrayBuffer;
206
blob: Blob;
207
nodebuffer: Buffer;
208
}
209
```
210
211
**Usage Examples:**
212
213
```javascript
214
const zip = await JSZip.loadAsync(zipData);
215
const file = zip.file("example.bin");
216
217
if (file) {
218
// Text formats
219
const base64String = await file.async("base64");
220
const textString = await file.async("string");
221
const binaryString = await file.async("binarystring");
222
223
// Binary formats
224
const uint8Array = await file.async("uint8array");
225
const arrayBuffer = await file.async("arraybuffer");
226
const numberArray = await file.async("array");
227
228
// Platform-specific formats
229
const blob = await file.async("blob"); // Browser
230
const buffer = await file.async("nodebuffer"); // Node.js
231
}
232
233
// Choose format based on file type
234
const textFile = zip.file("readme.txt");
235
const imageFile = zip.file("photo.png");
236
const dataFile = zip.file("binary.dat");
237
238
if (textFile) {
239
const text = await textFile.async("string");
240
console.log("Text content:", text);
241
}
242
243
if (imageFile) {
244
// For images, use blob in browser or buffer in Node.js
245
const imageBlob = await imageFile.async("blob");
246
const imageUrl = URL.createObjectURL(imageBlob);
247
}
248
249
if (dataFile) {
250
// For binary data, use ArrayBuffer or Uint8Array
251
const binaryData = await dataFile.async("uint8array");
252
processBinaryData(binaryData);
253
}
254
```
255
256
## Progress Tracking
257
258
Monitor file extraction progress with detailed metadata.
259
260
```javascript { .api }
261
interface JSZipMetadata {
262
/** Completion percentage (0-100) */
263
percent: number;
264
/** Currently processing file path */
265
currentFile: string | null;
266
}
267
268
type OnUpdateCallback = (metadata: JSZipMetadata) => void;
269
```
270
271
**Usage Examples:**
272
273
```javascript
274
const zip = await JSZip.loadAsync(zipData);
275
const largeFile = zip.file("huge-dataset.csv");
276
277
if (largeFile) {
278
// Extract with progress tracking
279
const content = await largeFile.async("string", (metadata) => {
280
// Update UI progress bar
281
updateProgressBar(metadata.percent);
282
283
// Show current status
284
console.log(`Extracting: ${metadata.percent}%`);
285
286
if (metadata.currentFile) {
287
console.log(`Processing: ${metadata.currentFile}`);
288
}
289
290
// Handle completion
291
if (metadata.percent === 100) {
292
console.log("Extraction completed!");
293
}
294
});
295
296
console.log("Final content length:", content.length);
297
}
298
299
// Progress tracking with streaming
300
const videoFile = zip.file("presentation.mp4");
301
if (videoFile) {
302
const stream = videoFile.nodeStream('nodebuffer', (metadata) => {
303
document.getElementById('status').textContent =
304
`Extracting: ${metadata.percent}% - ${metadata.currentFile || 'Processing...'}`;
305
});
306
307
stream.pipe(fs.createWriteStream('output.mp4'));
308
}
309
```
310
311
## Batch Extraction
312
313
Extract multiple files efficiently with various strategies.
314
315
**Usage Examples:**
316
317
```javascript
318
const zip = await JSZip.loadAsync(zipData);
319
320
// Extract all text files
321
const textFiles = zip.filter((relativePath, file) => {
322
return !file.dir && (relativePath.endsWith('.txt') || relativePath.endsWith('.md'));
323
});
324
325
const extractedTexts = {};
326
for (const file of textFiles) {
327
extractedTexts[file.name] = await file.async("string");
328
}
329
330
// Extract all images as blobs (browser)
331
const imageFiles = zip.filter((relativePath, file) => {
332
return !file.dir && /\.(png|jpg|jpeg|gif)$/i.test(relativePath);
333
});
334
335
const extractedImages = {};
336
for (const file of imageFiles) {
337
extractedImages[file.name] = await file.async("blob");
338
}
339
340
// Parallel extraction with Promise.all
341
const allFiles = zip.filter((relativePath, file) => !file.dir);
342
const fileContents = await Promise.all(
343
allFiles.map(async (file) => ({
344
name: file.name,
345
content: await file.async("uint8array"),
346
metadata: {
347
date: file.date,
348
size: file._data?.uncompressedSize,
349
compression: file.options.compression
350
}
351
}))
352
);
353
354
// Extract with size limits
355
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
356
const safeFiles = zip.filter((relativePath, file) => {
357
const size = file._data?.uncompressedSize || 0;
358
return !file.dir && size <= MAX_FILE_SIZE;
359
});
360
361
const safeExtractions = {};
362
for (const file of safeFiles) {
363
safeExtractions[file.name] = await file.async("string");
364
}
365
```
366
367
## Error Handling
368
369
Handle extraction errors and validation failures.
370
371
**Usage Examples:**
372
373
```javascript
374
const zip = await JSZip.loadAsync(zipData);
375
376
// Handle missing files
377
const file = zip.file("might-not-exist.txt");
378
if (file) {
379
try {
380
const content = await file.async("string");
381
console.log("Content:", content);
382
} catch (error) {
383
console.error("Extraction failed:", error.message);
384
}
385
} else {
386
console.log("File not found in ZIP");
387
}
388
389
// Handle corrupted files
390
try {
391
const suspiciousFile = zip.file("suspicious.bin");
392
if (suspiciousFile) {
393
const content = await suspiciousFile.async("uint8array");
394
console.log("Extracted successfully");
395
}
396
} catch (error) {
397
if (error.message.includes("CRC32")) {
398
console.error("File is corrupted (CRC mismatch)");
399
} else if (error.message.includes("compression")) {
400
console.error("Unsupported compression method");
401
} else {
402
console.error("Unknown extraction error:", error.message);
403
}
404
}
405
406
// Robust extraction with fallbacks
407
async function safeExtractText(zip, filename) {
408
const file = zip.file(filename);
409
if (!file) {
410
return null;
411
}
412
413
try {
414
return await file.async("string");
415
} catch (error) {
416
console.warn(`Failed to extract ${filename} as text:`, error.message);
417
418
try {
419
// Try binary extraction as fallback
420
const binary = await file.async("uint8array");
421
return new TextDecoder('utf-8', { fatal: false }).decode(binary);
422
} catch (fallbackError) {
423
console.error(`Complete extraction failure for ${filename}:`, fallbackError.message);
424
return null;
425
}
426
}
427
}
428
```