Content Addressable aRchive format reader and writer for IPLD data structures.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Efficient indexing functionality for creating block indices and enabling random access to large CAR files. CarIndexer processes CAR archives to generate location metadata for each block without loading block data into memory.
Provides efficient indexing of CAR archives with streaming block location generation.
/**
* Creates block indices for CAR archives
* Processes header and generates BlockIndex entries for each block
* Implements AsyncIterable for streaming index generation
*/
class CarIndexer {
/** CAR version number (1 or 2) */
readonly version: number;
/** Get the list of root CIDs from the CAR header */
getRoots(): Promise<CID[]>;
/** Iterate over all block indices in the CAR */
[Symbol.asyncIterator](): AsyncIterator<BlockIndex>;
/** Create indexer from Uint8Array */
static fromBytes(bytes: Uint8Array): Promise<CarIndexer>;
/** Create indexer from async stream */
static fromIterable(asyncIterable: AsyncIterable<Uint8Array>): Promise<CarIndexer>;
}
/**
* Block index containing location and size information
*/
interface BlockIndex {
/** CID of the block */
cid: CID;
/** Total length including CID encoding */
length: number;
/** Length of block data only (excludes CID) */
blockLength: number;
/** Byte offset of entire block entry in CAR */
offset: number;
/** Byte offset of block data (after CID) in CAR */
blockOffset: number;
}Usage Examples:
import { CarIndexer } from "@ipld/car/indexer";
import fs from 'fs';
// Index from bytes
const carBytes = fs.readFileSync('archive.car');
const indexer = await CarIndexer.fromBytes(carBytes);
// Index from stream (more memory efficient)
const stream = fs.createReadStream('large-archive.car');
const streamIndexer = await CarIndexer.fromIterable(stream);
// Access roots
const roots = await indexer.getRoots();
console.log(`Indexing CAR with ${roots.length} roots`);
// Iterate through block indices
for await (const blockIndex of indexer) {
console.log(`Block ${blockIndex.cid}:`);
console.log(` Total length: ${blockIndex.length}`);
console.log(` Block data length: ${blockIndex.blockLength}`);
console.log(` Starts at byte: ${blockIndex.offset}`);
console.log(` Block data at byte: ${blockIndex.blockOffset}`);
}Create lookup maps for random access to blocks by CID.
import { CarIndexer } from "@ipld/car/indexer";
import fs from 'fs';
// Build complete index map
const stream = fs.createReadStream('archive.car');
const indexer = await CarIndexer.fromIterable(stream);
const blockMap = new Map();
const sizeStats = { totalBlocks: 0, totalBytes: 0 };
for await (const blockIndex of indexer) {
// Store location info by CID string
blockMap.set(blockIndex.cid.toString(), {
offset: blockIndex.offset,
blockOffset: blockIndex.blockOffset,
blockLength: blockIndex.blockLength
});
// Collect statistics
sizeStats.totalBlocks++;
sizeStats.totalBytes += blockIndex.blockLength;
}
console.log(`Indexed ${sizeStats.totalBlocks} blocks, ${sizeStats.totalBytes} total bytes`);
// Use map for random access
const targetCid = someTargetCid;
const location = blockMap.get(targetCid.toString());
if (location) {
console.log(`Block ${targetCid} found at offset ${location.blockOffset}`);
}Combine indexing with raw block reading for efficient random access.
import { CarIndexer } from "@ipld/car/indexer";
import { CarReader } from "@ipld/car/reader";
import fs from 'fs';
// Index and read specific blocks
const fd = await fs.promises.open('large-archive.car', 'r');
const stream = fs.createReadStream('large-archive.car');
const indexer = await CarIndexer.fromIterable(stream);
// Find and read specific blocks
const targetCids = [cid1, cid2, cid3];
const foundBlocks = new Map();
for await (const blockIndex of indexer) {
const cidStr = blockIndex.cid.toString();
if (targetCids.some(cid => cid.toString() === cidStr)) {
// Read only the blocks we need
const block = await CarReader.readRaw(fd, blockIndex);
foundBlocks.set(cidStr, block);
// Stop early if we found all targets
if (foundBlocks.size === targetCids.length) {
break;
}
}
}
await fd.close();
console.log(`Found ${foundBlocks.size} of ${targetCids.length} target blocks`);Process large CAR files without loading entire contents into memory.
import { CarIndexer } from "@ipld/car/indexer";
import fs from 'fs';
// Process very large CAR file efficiently
const stream = fs.createReadStream('massive-archive.car');
const indexer = await CarIndexer.fromIterable(stream);
let processedCount = 0;
let processedBytes = 0;
for await (const blockIndex of indexer) {
// Process blocks in chunks or apply filtering
if (shouldProcessBlock(blockIndex.cid)) {
await processBlockIndex(blockIndex);
processedCount++;
processedBytes += blockIndex.blockLength;
// Progress reporting
if (processedCount % 1000 === 0) {
console.log(`Processed ${processedCount} blocks, ${processedBytes} bytes`);
}
}
}
console.log(`Completed processing: ${processedCount} blocks`);Common errors when indexing CAR files:
try {
const indexer = await CarIndexer.fromBytes(invalidData);
} catch (error) {
if (error instanceof TypeError) {
console.log('Invalid input format');
} else if (error.message.includes('Invalid CAR')) {
console.log('Malformed CAR file');
}
}
// Iteration can only be performed once
const indexer = await CarIndexer.fromBytes(carBytes);
// First iteration works
for await (const blockIndex of indexer) {
// Process blocks
}
// Second iteration will not work - need new indexer instance
// for await (const blockIndex of indexer) { // Won't iteratefromIterable() with file streams for best memory efficiency