JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
LLM-powered structured data extraction using natural language prompts, schemas, or AI agents for intelligent content processing.
/**
* Start an extract job (async)
* @param args - Extraction request configuration
* @returns Promise resolving to job ID or processing state
*/
startExtract(args: ExtractRequest): Promise<ExtractResponse>;
/**
* Get extract job status/data
* @param jobId - Extract job identifier
* @returns Promise resolving to extraction results
*/
getExtractStatus(jobId: string): Promise<ExtractResponse>;
/**
* Convenience waiter: start extract and poll until completion
* @param args - Extraction request plus waiter controls
* @returns Promise resolving to final extract response
*/
extract(args: ExtractRequest & { pollInterval?: number; timeout?: number }): Promise<ExtractResponse>;// Note: The exact ExtractRequest interface is inferred from method signatures
// Based on the v1 API, here are the typical extraction parameters:
interface ExtractRequest {
// URLs to extract data from
urls?: string[];
// Natural language extraction prompt
prompt?: string;
// Structured schema for extraction
schema?: Record<string, unknown> | ZodTypeAny;
// System prompt for AI context
systemPrompt?: string;
// Allow external link following
allowExternalLinks?: boolean;
// Enable web search for additional context
enableWebSearch?: boolean;
// Include subdomains in extraction
includeSubdomains?: boolean;
// Source origin tracking
origin?: string;
// Show source URLs in results
showSources?: boolean;
// Scraping options for URL processing
scrapeOptions?: ScrapeOptions;
// AI agent configuration
agent?: {
model?: string;
sessionId?: string;
};
}interface ExtractResponse {
success?: boolean;
id?: string;
status?: "processing" | "completed" | "failed" | "cancelled";
data?: unknown;
error?: string;
warning?: string;
sources?: Record<string, unknown>;
expiresAt?: string;
}// Extract structured data using natural language
const extractResult = await app.extract({
urls: ['https://company.example.com/about'],
prompt: 'Extract the company name, founding year, number of employees, and main business areas'
});
console.log('Extracted data:', extractResult.data);
// Returns structured data based on the promptimport { z } from 'zod';
// Define extraction schema
const CompanySchema = z.object({
name: z.string(),
foundingYear: z.number(),
employees: z.number().optional(),
industry: z.string(),
headquarters: z.string(),
revenue: z.string().optional(),
products: z.array(z.string()),
keyExecutives: z.array(z.object({
name: z.string(),
title: z.string()
}))
});
const extractResult = await app.extract({
urls: [
'https://company.example.com/about',
'https://company.example.com/leadership',
'https://company.example.com/products'
],
schema: CompanySchema,
prompt: 'Extract comprehensive company information including leadership and product details'
});
// Result is typed according to CompanySchema
console.log('Company data:', extractResult.data);const productUrls = [
'https://shop.example.com/products/laptop-pro',
'https://shop.example.com/products/tablet-air',
'https://shop.example.com/products/phone-max'
];
const ProductSchema = z.object({
name: z.string(),
price: z.number(),
currency: z.string(),
description: z.string(),
specifications: z.record(z.string()),
availability: z.enum(['in-stock', 'out-of-stock', 'pre-order']),
rating: z.number().optional(),
reviews: z.number().optional(),
images: z.array(z.string()),
category: z.string(),
brand: z.string()
});
const extractResult = await app.extract({
urls: productUrls,
schema: ProductSchema,
prompt: 'Extract comprehensive product information including pricing, specifications, and availability',
showSources: true
});
console.log('Products extracted:', extractResult.data);
console.log('Source URLs:', extractResult.sources);const NewsArticleSchema = z.object({
headline: z.string(),
summary: z.string(),
mainPoints: z.array(z.string()),
author: z.string().optional(),
publishDate: z.string(),
source: z.string(),
sentiment: z.enum(['positive', 'negative', 'neutral']),
topics: z.array(z.string()),
keyQuotes: z.array(z.string()),
relatedCompanies: z.array(z.string()),
impact: z.string().optional()
});
const extractResult = await app.extract({
urls: [
'https://news.example.com/tech-breakthrough',
'https://news.example.com/market-analysis',
'https://news.example.com/industry-trends'
],
schema: NewsArticleSchema,
prompt: 'Analyze news articles for key information, sentiment, and business impact',
systemPrompt: 'You are a business analyst extracting key insights from news articles. Focus on factual information and business implications.',
enableWebSearch: true,
showSources: true
});const ResearchPaperSchema = z.object({
title: z.string(),
authors: z.array(z.string()),
abstract: z.string(),
methodology: z.string(),
keyFindings: z.array(z.string()),
conclusions: z.string(),
futureWork: z.string().optional(),
citations: z.array(z.string()),
keywords: z.array(z.string()),
publishedDate: z.string().optional(),
journal: z.string().optional(),
doi: z.string().optional()
});
const extractResult = await app.extract({
urls: [
'https://research.example.com/papers/ai-ethics',
'https://research.example.com/papers/machine-learning-bias'
],
schema: ResearchPaperSchema,
prompt: 'Extract comprehensive research paper information including methodology, findings, and citations',
systemPrompt: 'You are an academic researcher extracting structured information from research papers. Focus on scientific accuracy and completeness.',
allowExternalLinks: true,
scrapeOptions: {
formats: ['markdown'],
onlyMainContent: true
}
});// Start extraction job
const extractResponse = await app.startExtract({
urls: Array.from({ length: 50 }, (_, i) =>
`https://reviews.example.com/product/${i + 1}`
),
schema: {
type: 'object',
properties: {
productName: { type: 'string' },
rating: { type: 'number' },
reviewText: { type: 'string' },
reviewer: { type: 'string' },
reviewDate: { type: 'string' },
pros: { type: 'array', items: { type: 'string' } },
cons: { type: 'array', items: { type: 'string' } },
recommended: { type: 'boolean' }
}
},
prompt: 'Extract detailed product review information including pros, cons, and recommendations'
});
console.log(`Started extraction job: ${extractResponse.id}`);
// Monitor progress
let result: ExtractResponse;
do {
await new Promise(resolve => setTimeout(resolve, 5000)); // Wait 5 seconds
result = await app.getExtractStatus(extractResponse.id!);
console.log(`Extraction status: ${result.status}`);
} while (result.status === 'processing');
if (result.status === 'completed') {
console.log('Extraction completed:', result.data);
} else {
console.error('Extraction failed:', result.error);
}const extractResult = await app.extract({
prompt: 'Find information about recent AI safety research developments, key researchers, and policy recommendations',
enableWebSearch: true,
schema: {
type: 'object',
properties: {
recentDevelopments: {
type: 'array',
items: {
type: 'object',
properties: {
title: { type: 'string' },
description: { type: 'string' },
researchers: { type: 'array', items: { type: 'string' } },
institution: { type: 'string' },
date: { type: 'string' },
significance: { type: 'string' }
}
}
},
keyResearchers: {
type: 'array',
items: {
type: 'object',
properties: {
name: { type: 'string' },
affiliation: { type: 'string' },
expertise: { type: 'array', items: { type: 'string' } },
recentWork: { type: 'string' }
}
}
},
policyRecommendations: {
type: 'array',
items: {
type: 'object',
properties: {
recommendation: { type: 'string' },
rationale: { type: 'string' },
source: { type: 'string' }
}
}
}
}
},
showSources: true
}, {
timeout: 300 // 5 minutes
});
console.log('AI safety research analysis:', extractResult.data);const FinancialDataSchema = z.object({
companyName: z.string(),
ticker: z.string().optional(),
currentPrice: z.number().optional(),
marketCap: z.string().optional(),
revenue: z.string(),
netIncome: z.string(),
eps: z.number().optional(),
peRatio: z.number().optional(),
dividendYield: z.number().optional(),
quarterlyGrowth: z.string().optional(),
keyMetrics: z.record(z.string()),
riskFactors: z.array(z.string()),
businessSegments: z.array(z.object({
segment: z.string(),
revenue: z.string(),
percentage: z.number().optional()
}))
});
const extractResult = await app.extract({
urls: [
'https://investor.example.com/financials',
'https://finance.yahoo.com/quote/EXAMPLE',
'https://www.sec.gov/example-10k'
],
schema: FinancialDataSchema,
prompt: 'Extract comprehensive financial data including revenue, profitability, key metrics, and risk factors',
systemPrompt: 'You are a financial analyst extracting key financial metrics and business information. Focus on numerical accuracy and current data.',
allowExternalLinks: true,
scrapeOptions: {
formats: ['markdown'],
timeout: 30000
}
});try {
const extractResult = await app.extract({
urls: ['https://complex-site.example.com'],
schema: ComplexSchema,
prompt: 'Extract detailed information',
timeout: 180 // 3 minutes
});
if (extractResult.success && extractResult.data) {
// Validate extracted data
if (typeof extractResult.data === 'object' && extractResult.data !== null) {
console.log('Extraction successful:', extractResult.data);
if (extractResult.warning) {
console.log('Warning:', extractResult.warning);
}
if (extractResult.sources) {
console.log('Sources used:', extractResult.sources);
}
} else {
console.log('Extraction returned unexpected data format');
}
} else {
console.error('Extraction failed:', extractResult.error);
}
} catch (error) {
console.error('Extraction error:', error);
// Fallback to simpler extraction
try {
const fallbackResult = await app.extract({
urls: ['https://simple-fallback.example.com'],
prompt: 'Extract basic information',
timeout: 60
});
console.log('Fallback extraction:', fallbackResult.data);
} catch (fallbackError) {
console.error('Fallback extraction also failed:', fallbackError);
}
}const extractResult = await app.extract({
urls: ['https://technical-docs.example.com'],
prompt: 'Extract API documentation including endpoints, parameters, and examples',
schema: {
type: 'object',
properties: {
endpoints: {
type: 'array',
items: {
type: 'object',
properties: {
method: { type: 'string' },
path: { type: 'string' },
description: { type: 'string' },
parameters: { type: 'array', items: { type: 'object' } },
responses: { type: 'object' },
examples: { type: 'array', items: { type: 'string' } }
}
}
}
}
},
agent: {
model: 'gpt-4',
sessionId: 'api-docs-extraction-session'
},
scrapeOptions: {
formats: ['markdown'],
onlyMainContent: true
}
});