0
# Tar Extraction
1
2
Streaming tar archive parser that extracts entries from tar data without hitting the file system. The extract stream processes tar archives sequentially and emits events for each entry.
3
4
## Capabilities
5
6
### Extract Factory Function
7
8
Creates a new tar extraction stream.
9
10
```javascript { .api }
11
/**
12
* Creates a new tar extraction stream
13
* @param {ExtractOptions} [opts] - Optional configuration
14
* @returns {Extract} Extract stream instance
15
*/
16
function extract(opts?: ExtractOptions): Extract;
17
18
interface ExtractOptions {
19
/** Character encoding for filenames, defaults to 'utf-8' */
20
filenameEncoding?: string;
21
/** Allow unknown tar formats, defaults to false */
22
allowUnknownFormat?: boolean;
23
}
24
```
25
26
### Extract Stream
27
28
Writable stream that parses tar data and emits entry events.
29
30
```javascript { .api }
31
class Extract extends Writable {
32
/** Async iterator interface for processing entries */
33
[Symbol.asyncIterator](): AsyncIterableIterator<EntryStream>;
34
}
35
```
36
37
### Entry Event
38
39
Emitted for each tar entry (file, directory, etc.) found in the archive.
40
41
```javascript { .api }
42
/**
43
* Entry event handler
44
* @param {Header} header - Tar header with entry metadata
45
* @param {EntryStream} stream - Readable stream of entry content
46
* @param {Function} next - Callback to proceed to next entry
47
*/
48
extract.on('entry', function(header: Header, stream: EntryStream, next: () => void): void);
49
```
50
51
### Entry Stream
52
53
Readable stream representing the content of a tar entry.
54
55
```javascript { .api }
56
class EntryStream extends Readable {
57
/** Tar header for this entry */
58
header: Header;
59
/** Byte offset of this entry in the tar archive */
60
offset: number;
61
}
62
```
63
64
### Finish Event
65
66
Emitted when all entries have been processed.
67
68
```javascript { .api }
69
extract.on('finish', function(): void);
70
```
71
72
## Usage Examples
73
74
### Basic Extraction
75
76
```javascript
77
const tar = require('tar-stream');
78
const fs = require('fs');
79
80
const extract = tar.extract();
81
82
extract.on('entry', function(header, stream, next) {
83
console.log('Entry:', header.name, 'Size:', header.size);
84
85
stream.on('end', function() {
86
next(); // ready for next entry
87
});
88
89
stream.resume(); // auto-drain the stream
90
});
91
92
extract.on('finish', function() {
93
console.log('Extraction complete');
94
});
95
96
// Pipe tar data to extractor
97
fs.createReadStream('archive.tar').pipe(extract);
98
```
99
100
### Async Iterator Usage
101
102
```javascript
103
const tar = require('tar-stream');
104
const fs = require('fs');
105
106
async function extractArchive() {
107
const extract = tar.extract();
108
109
// Pipe tar data to extractor
110
fs.createReadStream('archive.tar').pipe(extract);
111
112
// Process entries using async iterator
113
for await (const entry of extract) {
114
console.log('Processing:', entry.header.name);
115
116
// Entry stream is the same object as the iterator value
117
entry.resume(); // drain the stream
118
}
119
}
120
```
121
122
### Content Processing
123
124
```javascript
125
const tar = require('tar-stream');
126
const fs = require('fs');
127
128
const extract = tar.extract();
129
130
extract.on('entry', function(header, stream, next) {
131
if (header.type === 'file' && header.name.endsWith('.txt')) {
132
let content = '';
133
134
stream.on('data', function(chunk) {
135
content += chunk.toString();
136
});
137
138
stream.on('end', function() {
139
console.log('File content:', content);
140
next();
141
});
142
} else {
143
stream.resume(); // skip non-text files
144
stream.on('end', next);
145
}
146
});
147
148
fs.createReadStream('archive.tar').pipe(extract);
149
```
150
151
### Error Handling
152
153
```javascript
154
const tar = require('tar-stream');
155
156
const extract = tar.extract({
157
allowUnknownFormat: true, // allow non-standard tar formats
158
filenameEncoding: 'latin1' // handle non-UTF8 filenames
159
});
160
161
extract.on('error', function(err) {
162
console.error('Extraction error:', err.message);
163
});
164
165
extract.on('entry', function(header, stream, next) {
166
stream.on('error', function(err) {
167
console.error('Entry stream error:', err.message);
168
next(err); // propagate error
169
});
170
171
stream.resume();
172
stream.on('end', next);
173
});
174
```
175
176
## Important Notes
177
178
- **Sequential Processing**: The tar archive is streamed sequentially, so you must drain each entry's stream before the next entry will be processed
179
- **Memory Efficiency**: Streams allow processing large archives without loading entire contents into memory
180
- **Format Support**: Supports USTAR format with pax extended headers for long filenames/paths
181
- **Error Recovery**: Invalid tar headers will cause errors unless `allowUnknownFormat` is enabled