tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Web Scraping

Name: tessl/npm-mendable--firecrawl-js
Author: tessl

Single URL scraping with multiple output formats, browser automation, and structured data extraction.

Core Scraping Method

/**
 * Scrape a single URL with optional format and processing options.
 * @param url - Target URL to scrape
 * @param options - Scraping configuration options
 * @returns Promise resolving to scraped document
 */
scrape<Opts extends ScrapeOptions>(
  url: string, 
  options: Opts
): Promise<Omit<Document, "json"> & { json?: InferredJsonFromOptions<Opts> }>;

scrape(url: string, options?: ScrapeOptions): Promise<Document>;

Scrape Options

interface ScrapeOptions {
  // Output formats to include in response
  formats?: FormatOption[];
  
  // HTTP configuration
  headers?: Record<string, string>;
  timeout?: number;
  skipTlsVerification?: boolean;
  proxy?: "basic" | "stealth" | "auto" | string;
  
  // Content filtering
  includeTags?: string[];
  excludeTags?: string[];
  onlyMainContent?: boolean;
  removeBase64Images?: boolean;
  
  // Browser behavior
  mobile?: boolean;
  waitFor?: number;
  fastMode?: boolean;
  blockAds?: boolean;
  
  // Browser automation
  actions?: ActionOption[];
  
  // Document parsing
  parsers?: Array<string | { type: "pdf"; maxPages?: number }>;
  
  // Location simulation
  location?: LocationConfig;
  
  // Caching
  maxAge?: number;
  storeInCache?: boolean;
  
  // Testing
  useMock?: string;
  
  // Integration tracking
  integration?: string;
}

Format Options

// Available format strings
type FormatString = 
  | "markdown" 
  | "html" 
  | "rawHtml" 
  | "links" 
  | "images" 
  | "screenshot" 
  | "summary"
  | "changeTracking" 
  | "json" 
  | "attributes";

// Format configurations
type FormatOption = 
  | FormatString 
  | JsonFormat 
  | ScreenshotFormat 
  | ChangeTrackingFormat 
  | AttributesFormat;

// JSON extraction with schema
interface JsonFormat {
  type: "json";
  prompt?: string;
  schema?: Record<string, unknown> | ZodTypeAny;
}

// Screenshot configuration
interface ScreenshotFormat {
  type: "screenshot";
  fullPage?: boolean;
  quality?: number;
  viewport?: Viewport | { width: number; height: number };
}

// Change tracking
interface ChangeTrackingFormat {
  type: "changeTracking";
  modes: ("git-diff" | "json")[];
  schema?: Record<string, unknown>;
  prompt?: string;
  tag?: string;
}

// Attribute extraction
interface AttributesFormat {
  type: "attributes";
  selectors: Array<{
    selector: string;
    attribute: string;
  }>;
}

Browser Actions

// Available action types
type ActionOption = 
  | WaitAction 
  | ScreenshotAction 
  | ClickAction 
  | WriteAction 
  | PressAction 
  | ScrollAction 
  | ScrapeAction 
  | ExecuteJavascriptAction 
  | PDFAction;

// Wait for element or time
interface WaitAction {
  type: "wait";
  milliseconds?: number;
  selector?: string;
}

// Click elements
interface ClickAction {
  type: "click";
  selector: string;
}

// Type text
interface WriteAction {
  type: "write";
  text: string;
}

// Press keys
interface PressAction {
  type: "press";
  key: string;
}

// Scroll page
interface ScrollAction {
  type: "scroll";
  direction: "up" | "down";
  selector?: string;
}

// Take screenshot
interface ScreenshotAction {
  type: "screenshot";
  fullPage?: boolean;
  quality?: number;
  viewport?: Viewport | { width: number; height: number };
}

// Scrape current state
interface ScrapeAction {
  type: "scrape";
}

// Execute JavaScript
interface ExecuteJavascriptAction {
  type: "executeJavascript";
  script: string;
}

// Generate PDF
interface PDFAction {
  type: "pdf";
  format?: "A0" | "A1" | "A2" | "A3" | "A4" | "A5" | "A6" | "Letter" | "Legal" | "Tabloid" | "Ledger";
  landscape?: boolean;
  scale?: number;
}

Location Configuration

interface LocationConfig {
  country?: string;
  languages?: string[];
}

interface Viewport {
  width: number;
  height: number;
}

Usage Examples

Basic Scraping

// Simple markdown extraction
const result = await app.scrape('https://example.com', {
  formats: ['markdown']
});
console.log(result.markdown);

// Multiple formats
const result = await app.scrape('https://example.com', {
  formats: ['markdown', 'html', 'links', 'images']
});

JSON Extraction with Schema

import { z } from 'zod';

// Using Zod schema
const ProductSchema = z.object({
  name: z.string(),
  price: z.number(),
  description: z.string(),
  inStock: z.boolean()
});

const result = await app.scrape('https://shop.example.com/product/123', {
  formats: [{
    type: 'json',
    schema: ProductSchema
  }]
});
// result.json is now typed as ProductSchema

// Using JSON schema object
const result2 = await app.scrape('https://shop.example.com/product/123', {
  formats: [{
    type: 'json',
    schema: {
      type: 'object',
      properties: {
        name: { type: 'string' },
        price: { type: 'number' }
      },
      required: ['name', 'price']
    }
  }]
});

Browser Automation

// Login and scrape protected content
const result = await app.scrape('https://app.example.com/login', {
  formats: ['markdown'],
  actions: [
    { type: 'wait', selector: '#username' },
    { type: 'click', selector: '#username' },
    { type: 'write', text: 'myuser@example.com' },
    { type: 'click', selector: '#password' },
    { type: 'write', text: 'mypassword' },
    { type: 'click', selector: '#login-button' },
    { type: 'wait', milliseconds: 3000 },
    { type: 'scrape' }
  ]
});

Screenshot with Custom Viewport

const result = await app.scrape('https://example.com', {
  formats: [{
    type: 'screenshot',
    fullPage: true,
    quality: 90,
    viewport: { width: 1920, height: 1080 }
  }]
});
console.log(result.screenshot); // Base64 image data

Attribute Extraction

const result = await app.scrape('https://example.com', {
  formats: [{
    type: 'attributes',
    selectors: [
      { selector: 'a', attribute: 'href' },
      { selector: 'img', attribute: 'src' },
      { selector: 'meta[name="description"]', attribute: 'content' }
    ]
  }]
});
console.log(result.attributes);

Advanced Configuration

const result = await app.scrape('https://example.com', {
  formats: ['markdown', 'screenshot'],
  headers: {
    'User-Agent': 'MyBot/1.0',
    'Accept-Language': 'en-US,en;q=0.9'
  },
  mobile: true,
  waitFor: 2000,
  includeTags: ['article', 'main'],
  excludeTags: ['nav', 'footer', 'aside'],
  onlyMainContent: true,
  blockAds: true,
  location: {
    country: 'US',
    languages: ['en']
  },
  proxy: 'stealth'
});