or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch.mdcrawling.mdextraction.mdindex.mdmonitoring.mdscraping.mdusage.mdv1-api.md

v1-api.mddocs/

0

# Legacy V1 API

1

2

Complete v1 API support for backward compatibility with existing implementations. The v1 API provides the original Firecrawl functionality with its own set of methods, types, and patterns.

3

4

## Capabilities

5

6

### V1 Synchronous Client

7

8

Legacy synchronous client for v1 API operations with traditional method signatures and response formats.

9

10

```python { .api }

11

class V1FirecrawlApp:

12

"""Legacy v1 synchronous client"""

13

14

def __init__(self, api_key: str = None, api_url: str = None):

15

"""

16

Initialize v1 client.

17

18

Parameters:

19

- api_key: str, Firecrawl API key

20

- api_url: str, optional custom API URL

21

"""

22

23

def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:

24

"""

25

Scrape a single URL using v1 API.

26

27

Parameters:

28

- url: str, target URL to scrape

29

- params: dict, optional scraping parameters

30

31

Returns:

32

- dict: v1 scrape response format

33

"""

34

35

def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:

36

"""

37

Crawl a website using v1 API with automatic polling.

38

39

Parameters:

40

- url: str, target URL to crawl

41

- params: dict, optional crawling parameters

42

- poll_interval: int, polling interval in seconds

43

44

Returns:

45

- dict: v1 crawl response with all results

46

"""

47

48

def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:

49

"""

50

Start async crawl job using v1 API.

51

52

Parameters:

53

- url: str, target URL to crawl

54

- params: dict, optional crawling parameters

55

56

Returns:

57

- dict: v1 job response with job ID

58

"""

59

60

def check_crawl_status(self, job_id: str) -> dict:

61

"""

62

Check v1 crawl job status.

63

64

Parameters:

65

- job_id: str, job ID from async_crawl_url

66

67

Returns:

68

- dict: v1 job status response

69

"""

70

71

def check_crawl_errors(self, job_id: str) -> dict:

72

"""

73

Check v1 crawl job errors.

74

75

Parameters:

76

- job_id: str, job ID from async_crawl_url

77

78

Returns:

79

- dict: v1 job error information

80

"""

81

82

def cancel_crawl(self, job_id: str) -> dict:

83

"""

84

Cancel a v1 crawl job.

85

86

Parameters:

87

- job_id: str, job ID to cancel

88

89

Returns:

90

- dict: v1 cancellation response

91

"""

92

93

def crawl_url_and_watch(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:

94

"""

95

Crawl URL with real-time progress monitoring.

96

97

Parameters:

98

- url: str, target URL to crawl

99

- params: dict, optional crawling parameters

100

- poll_interval: int, polling interval in seconds

101

102

Returns:

103

- dict: v1 crawl response with progress tracking

104

"""

105

```

106

107

### V1 Asynchronous Client

108

109

Legacy asynchronous client providing async versions of all v1 operations.

110

111

```python { .api }

112

class AsyncV1FirecrawlApp:

113

"""Legacy v1 asynchronous client"""

114

115

def __init__(self, api_key: str = None, api_url: str = None):

116

"""Initialize async v1 client"""

117

118

async def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:

119

"""Async version of v1 scrape_url"""

120

121

async def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:

122

"""Async version of v1 crawl_url with polling"""

123

124

async def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:

125

"""Async version of v1 async_crawl_url"""

126

127

async def check_crawl_status(self, job_id: str) -> dict:

128

"""Async version of v1 check_crawl_status"""

129

```

130

131

### V1 Extended Operations

132

133

Additional v1 operations for mapping, extraction, research, and batch processing.

134

135

```python { .api }

136

# V1FirecrawlApp additional methods

137

def map_url(self, url: str) -> dict:

138

"""

139

Generate URL map using v1 API.

140

141

Parameters:

142

- url: str, target website URL

143

144

Returns:

145

- dict: v1 map response

146

"""

147

148

def extract(self, data: dict, schema: dict, prompt: Optional[str] = None) -> dict:

149

"""

150

Extract structured data using v1 API.

151

152

Parameters:

153

- data: dict, input data for extraction

154

- schema: dict, extraction schema

155

- prompt: str, optional extraction prompt

156

157

Returns:

158

- dict: v1 extraction response

159

"""

160

161

def deep_research(self, query: str, max_articles: int = 5) -> dict:

162

"""

163

Perform deep research using v1 API.

164

165

Parameters:

166

- query: str, research query

167

- max_articles: int, maximum articles to analyze

168

169

Returns:

170

- dict: v1 research response

171

"""

172

173

def generate_llms_text(self, data: dict, prompt: str) -> dict:

174

"""

175

Generate text using LLM via v1 API.

176

177

Parameters:

178

- data: dict, input data

179

- prompt: str, generation prompt

180

181

Returns:

182

- dict: v1 text generation response

183

"""

184

185

def batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:

186

"""

187

Batch scrape URLs using v1 API.

188

189

Parameters:

190

- urls: List[str], list of URLs to scrape

191

- params: dict, optional scraping parameters

192

193

Returns:

194

- dict: v1 batch scrape response

195

"""

196

197

def async_batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:

198

"""

199

Start async batch scrape using v1 API.

200

201

Parameters:

202

- urls: List[str], list of URLs to scrape

203

- params: dict, optional scraping parameters

204

205

Returns:

206

- dict: v1 batch job response

207

"""

208

209

def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[dict] = None, poll_interval: int = 2) -> dict:

210

"""

211

Batch scrape URLs with real-time progress monitoring.

212

213

Parameters:

214

- urls: List[str], list of URLs to scrape

215

- params: dict, optional scraping parameters

216

- poll_interval: int, polling interval in seconds

217

218

Returns:

219

- dict: v1 batch scrape response with progress tracking

220

"""

221

222

def check_batch_scrape_status(self, job_id: str) -> dict:

223

"""

224

Check v1 batch scrape job status.

225

226

Parameters:

227

- job_id: str, job ID from async_batch_scrape_urls

228

229

Returns:

230

- dict: v1 batch job status response

231

"""

232

233

def check_batch_scrape_errors(self, job_id: str) -> dict:

234

"""

235

Check v1 batch scrape job errors.

236

237

Parameters:

238

- job_id: str, job ID from async_batch_scrape_urls

239

240

Returns:

241

- dict: v1 batch job error information

242

"""

243

```

244

245

## Usage Examples

246

247

### Basic V1 Operations

248

249

```python

250

from firecrawl import V1FirecrawlApp

251

252

# Initialize v1 client

253

app = V1FirecrawlApp(api_key="your-api-key")

254

255

# V1 scraping

256

result = app.scrape_url("https://example.com", {

257

"pageOptions": {

258

"includeHtml": True,

259

"includeMarkdown": True,

260

"includeRawHtml": False,

261

"screenshot": False

262

}

263

})

264

265

print(f"Content: {result['data']['content']}")

266

print(f"HTML: {result['data']['html']}")

267

268

# V1 crawling with polling

269

crawl_result = app.crawl_url("https://example.com", {

270

"crawlerOptions": {

271

"maxDepth": 2,

272

"limit": 50

273

},

274

"pageOptions": {

275

"includeMarkdown": True

276

}

277

})

278

279

print(f"Crawled {len(crawl_result['data'])} pages")

280

```

281

282

### V1 Async Operations

283

284

```python

285

from firecrawl import V1FirecrawlApp

286

import time

287

288

app = V1FirecrawlApp(api_key="your-api-key")

289

290

# Start async crawl

291

job_response = app.async_crawl_url("https://example.com", {

292

"crawlerOptions": {"limit": 100}

293

})

294

295

job_id = job_response['jobId']

296

print(f"Started job: {job_id}")

297

298

# Poll for completion

299

while True:

300

status = app.check_crawl_status(job_id)

301

print(f"Status: {status['status']}")

302

303

if status['status'] == 'completed':

304

print(f"Crawl completed with {len(status['data'])} pages")

305

break

306

elif status['status'] == 'failed':

307

print("Crawl failed")

308

break

309

310

time.sleep(5)

311

```

312

313

### V1 Data Extraction

314

315

```python

316

from firecrawl import V1FirecrawlApp

317

318

app = V1FirecrawlApp(api_key="your-api-key")

319

320

# First scrape the page

321

scrape_result = app.scrape_url("https://store.example.com/product/123")

322

323

# Extract structured data

324

extraction_schema = {

325

"type": "object",

326

"properties": {

327

"product_name": {"type": "string"},

328

"price": {"type": "number"},

329

"availability": {"type": "string"},

330

"features": {

331

"type": "array",

332

"items": {"type": "string"}

333

}

334

}

335

}

336

337

extracted = app.extract(

338

data=scrape_result['data'],

339

schema=extraction_schema,

340

prompt="Extract product information from this e-commerce page"

341

)

342

343

print(f"Product: {extracted['data']['product_name']}")

344

print(f"Price: ${extracted['data']['price']}")

345

```

346

347

### V1 Batch Operations

348

349

```python

350

from firecrawl import V1FirecrawlApp

351

352

app = V1FirecrawlApp(api_key="your-api-key")

353

354

# Batch scrape multiple URLs

355

urls = [

356

"https://example1.com",

357

"https://example2.com",

358

"https://example3.com"

359

]

360

361

batch_result = app.batch_scrape_urls(urls, {

362

"pageOptions": {

363

"includeMarkdown": True,

364

"includeHtml": False

365

}

366

})

367

368

for item in batch_result['data']:

369

print(f"URL: {item['metadata']['sourceURL']}")

370

print(f"Title: {item['metadata'].get('title', 'No title')}")

371

print(f"Content length: {len(item['content'])}")

372

print("---")

373

```

374

375

### V1 Research Operations

376

377

```python

378

from firecrawl import V1FirecrawlApp

379

380

app = V1FirecrawlApp(api_key="your-api-key")

381

382

# Deep research on a topic

383

research_result = app.deep_research(

384

query="latest developments in artificial intelligence",

385

max_articles=10

386

)

387

388

print(f"Research summary: {research_result['data']['summary']}")

389

print(f"Sources analyzed: {len(research_result['data']['sources'])}")

390

391

for source in research_result['data']['sources']:

392

print(f"- {source['title']}: {source['url']}")

393

394

# Generate text based on data

395

text_result = app.generate_llms_text(

396

data=research_result['data'],

397

prompt="Write a brief executive summary of the AI developments"

398

)

399

400

print(f"Generated summary: {text_result['data']['text']}")

401

```

402

403

## V1 Types and Configuration

404

405

### V1 Configuration Types

406

407

```python { .api }

408

class V1JsonConfig:

409

"""V1 JSON configuration"""

410

include_html: bool

411

include_markdown: bool

412

include_raw_html: bool

413

include_links: bool

414

415

class V1ScrapeOptions:

416

"""V1 scraping options"""

417

formats: List[str] # ["markdown", "html", "rawHtml", "content", "links", "screenshot"]

418

headers: Optional[dict]

419

include_tags: Optional[List[str]]

420

exclude_tags: Optional[List[str]]

421

only_main_content: Optional[bool]

422

wait_for: Optional[int]

423

424

class V1ChangeTrackingOptions:

425

"""V1 change tracking configuration"""

426

include_html: bool

427

xpath: Optional[str]

428

css_selector: Optional[str]

429

430

class V1CrawlOptions:

431

"""V1 crawling options"""

432

includes: Optional[List[str]]

433

excludes: Optional[List[str]]

434

generate_img_alt_text: Optional[bool]

435

return_only_urls: Optional[bool]

436

max_depth: Optional[int]

437

mode: Optional[str] # "fast", "default"

438

ignore_sitemap: Optional[bool]

439

limit: Optional[int]

440

allow_backward_crawling: Optional[bool]

441

allow_external_content_links: Optional[bool]

442

443

class V1ExtractOptions:

444

"""V1 extraction options"""

445

mode: Optional[str] # "llm-extraction", "llm-extraction-from-raw-html"

446

extraction_prompt: Optional[str]

447

extraction_schema: Optional[dict]

448

```

449

450

### V1 Response Types

451

452

```python { .api }

453

class V1Document:

454

"""V1 document structure"""

455

content: str

456

html: Optional[str]

457

raw_html: Optional[str]

458

markdown: Optional[str]

459

metadata: V1DocumentMetadata

460

461

class V1DocumentMetadata:

462

"""V1 document metadata"""

463

title: Optional[str]

464

description: Optional[str]

465

language: Optional[str]

466

source_url: str

467

page_status_code: Optional[int]

468

page_error: Optional[str]

469

470

class V1ScrapeResponse:

471

"""V1 scrape response"""

472

success: bool

473

data: V1Document

474

475

class V1CrawlResponse:

476

"""V1 crawl response"""

477

success: bool

478

data: List[V1Document]

479

480

class V1CrawlJobStatus:

481

"""V1 crawl job status"""

482

status: str # "active", "paused", "completed", "failed"

483

job_id: str

484

current: Optional[int]

485

total: Optional[int]

486

data: Optional[List[V1Document]]

487

partial_data: Optional[List[V1Document]]

488

```

489

490

## Unified Client V1 Access

491

492

Access v1 API through the unified client:

493

494

```python

495

from firecrawl import Firecrawl

496

497

# Main client defaults to v2

498

app = Firecrawl(api_key="your-api-key")

499

500

# Access v1 API via .v1 property

501

v1_result = app.v1.scrape_url("https://example.com")

502

v1_crawl = app.v1.crawl_url("https://example.com", {"crawlerOptions": {"limit": 10}})

503

504

# V1 methods are available on the .v1 proxy

505

print(f"V1 scrape result: {v1_result['data']['content']}")

506

```

507

508

## Migration from V1 to V2

509

510

### Key Differences

511

512

```python

513

# V1 style

514

from firecrawl import V1FirecrawlApp

515

app = V1FirecrawlApp(api_key="key")

516

result = app.scrape_url("https://example.com", {

517

"pageOptions": {"includeMarkdown": True}

518

})

519

520

# V2 style

521

from firecrawl import Firecrawl, ScrapeOptions

522

app = Firecrawl(api_key="key")

523

result = app.scrape("https://example.com", ScrapeOptions(formats=["markdown"]))

524

525

# Unified client (recommended)

526

from firecrawl import Firecrawl

527

app = Firecrawl(api_key="key")

528

529

# Use v2 by default

530

v2_result = app.scrape("https://example.com")

531

532

# Use v1 when needed

533

v1_result = app.v1.scrape_url("https://example.com")

534

```

535

536

## Async V1 Usage

537

538

```python

539

import asyncio

540

from firecrawl import AsyncV1FirecrawlApp

541

542

async def v1_async_example():

543

app = AsyncV1FirecrawlApp(api_key="your-api-key")

544

545

# Async v1 scraping

546

result = await app.scrape_url("https://example.com")

547

548

# Async v1 crawling

549

crawl_result = await app.crawl_url("https://example.com", {

550

"crawlerOptions": {"limit": 50}

551

})

552

553

# Async v1 batch scraping

554

batch_result = await app.batch_scrape_urls([

555

"https://example1.com",

556

"https://example2.com"

557

])

558

559

asyncio.run(v1_async_example())

560

```