or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

css-selection.mddom-manipulation.mdform-handling.mdhtml-sanitization.mdhttp-connection.mdindex.mdparsing.md

http-connection.mddocs/

0

# HTTP Connection

1

2

HTTP client functionality for fetching web pages with full configuration control including headers, cookies, timeouts, and session management. jsoup provides a comprehensive HTTP client designed specifically for web scraping and HTML processing.

3

4

## Capabilities

5

6

### Connection Creation

7

8

Create HTTP connections for fetching and parsing web content.

9

10

```java { .api }

11

/**

12

* Create a new HTTP connection to the specified URL.

13

* @param url URL to connect to (must be http or https)

14

* @return Connection instance for configuration and execution

15

*/

16

public static Connection connect(String url);

17

18

/**

19

* Create a new HTTP session for maintaining connection settings across requests.

20

* @return Connection instance configured as a session

21

*/

22

public static Connection newSession();

23

```

24

25

**Usage Examples:**

26

27

```java

28

import org.jsoup.Jsoup;

29

import org.jsoup.Connection;

30

import org.jsoup.nodes.Document;

31

32

// Single request connection

33

Connection conn = Jsoup.connect("https://example.com");

34

35

// Session for multiple requests

36

Connection session = Jsoup.newSession()

37

.timeout(30000)

38

.userAgent("MyBot 1.0");

39

```

40

41

### URL Configuration

42

43

Configure the target URL and related settings.

44

45

```java { .api }

46

/**

47

* Set the request URL.

48

* @param url target URL as string

49

* @return this Connection for chaining

50

*/

51

public Connection url(String url);

52

53

/**

54

* Set the request URL.

55

* @param url target URL object

56

* @return this Connection for chaining

57

*/

58

public Connection url(URL url);

59

60

/**

61

* Set the referrer header.

62

* @param referrer referrer URL

63

* @return this Connection for chaining

64

*/

65

public Connection referrer(String referrer);

66

67

/**

68

* Set whether to follow HTTP redirects.

69

* @param followRedirects true to follow redirects (default: true)

70

* @return this Connection for chaining

71

*/

72

public Connection followRedirects(boolean followRedirects);

73

```

74

75

**Usage Examples:**

76

77

```java

78

Connection conn = Jsoup.connect("https://example.com")

79

.referrer("https://google.com")

80

.followRedirects(true);

81

82

// Change URL dynamically

83

conn.url("https://different-site.com/page");

84

```

85

86

### Request Configuration

87

88

Configure HTTP method, headers, and connection behavior.

89

90

```java { .api }

91

/**

92

* Set the HTTP request method.

93

* @param method HTTP method (GET, POST, PUT, PATCH, DELETE, HEAD, OPTIONS, TRACE)

94

* @return this Connection for chaining

95

*/

96

public Connection method(Connection.Method method);

97

98

/**

99

* Set the User-Agent header.

100

* @param userAgent user agent string

101

* @return this Connection for chaining

102

*/

103

public Connection userAgent(String userAgent);

104

105

/**

106

* Set connection and read timeout.

107

* @param millis timeout in milliseconds (0 = infinite)

108

* @return this Connection for chaining

109

*/

110

public Connection timeout(int millis);

111

112

/**

113

* Set maximum response body size.

114

* @param bytes maximum body size in bytes (0 = unlimited)

115

* @return this Connection for chaining

116

*/

117

public Connection maxBodySize(int bytes);

118

119

/**

120

* Set whether to ignore HTTP error status codes.

121

* @param ignoreHttpErrors true to ignore errors (default: false)

122

* @return this Connection for chaining

123

*/

124

public Connection ignoreHttpErrors(boolean ignoreHttpErrors);

125

126

/**

127

* Set whether to ignore unsupported content types.

128

* @param ignoreContentType true to ignore content type checks (default: false)

129

* @return this Connection for chaining

130

*/

131

public Connection ignoreContentType(boolean ignoreContentType);

132

133

/**

134

* Set whether to validate TLS certificates.

135

* @param value true to validate certificates (default: true)

136

* @return this Connection for chaining

137

*/

138

public Connection validateTLSCertificates(boolean value);

139

```

140

141

**Usage Examples:**

142

143

```java

144

Connection conn = Jsoup.connect("https://api.example.com")

145

.method(Connection.Method.POST)

146

.userAgent("Mozilla/5.0 (compatible; MyBot/1.0)")

147

.timeout(10000) // 10 second timeout

148

.maxBodySize(1024 * 1024) // 1MB max response

149

.ignoreHttpErrors(true)

150

.ignoreContentType(true);

151

```

152

153

### Headers and Cookies

154

155

Configure HTTP headers and cookies for requests.

156

157

```java { .api }

158

/**

159

* Set a request header.

160

* @param name header name

161

* @param value header value

162

* @return this Connection for chaining

163

*/

164

public Connection header(String name, String value);

165

166

/**

167

* Set multiple request headers.

168

* @param headers Map of header names to values

169

* @return this Connection for chaining

170

*/

171

public Connection headers(Map<String, String> headers);

172

173

/**

174

* Set a cookie.

175

* @param name cookie name

176

* @param value cookie value

177

* @return this Connection for chaining

178

*/

179

public Connection cookie(String name, String value);

180

181

/**

182

* Set multiple cookies.

183

* @param cookies Map of cookie names to values

184

* @return this Connection for chaining

185

*/

186

public Connection cookies(Map<String, String> cookies);

187

```

188

189

**Usage Examples:**

190

191

```java

192

Connection conn = Jsoup.connect("https://example.com")

193

.header("Accept", "text/html,application/xhtml+xml")

194

.header("Accept-Language", "en-US,en;q=0.5")

195

.cookie("session", "abc123")

196

.cookie("preferences", "theme=dark");

197

198

// Set multiple at once

199

Map<String, String> headers = new HashMap<>();

200

headers.put("Authorization", "Bearer token123");

201

headers.put("Content-Type", "application/json");

202

conn.headers(headers);

203

```

204

205

### Form Data and Request Body

206

207

Send form data and request payloads.

208

209

```java { .api }

210

/**

211

* Add form data parameter.

212

* @param key parameter name

213

* @param value parameter value

214

* @return this Connection for chaining

215

*/

216

public Connection data(String key, String value);

217

218

/**

219

* Add file upload data.

220

* @param key parameter name

221

* @param filename filename for upload

222

* @param inputStream file content stream

223

* @return this Connection for chaining

224

*/

225

public Connection data(String key, String filename, InputStream inputStream);

226

227

/**

228

* Set form data from collection.

229

* @param data Collection of Connection.KeyVal pairs

230

* @return this Connection for chaining

231

*/

232

public Connection data(Collection<Connection.KeyVal> data);

233

234

/**

235

* Set form data from map.

236

* @param data Map of parameter names to values

237

* @return this Connection for chaining

238

*/

239

public Connection data(Map<String, String> data);

240

241

/**

242

* Set the request body directly.

243

* @param body request body content

244

* @return this Connection for chaining

245

*/

246

public Connection requestBody(String body);

247

```

248

249

**Usage Examples:**

250

251

```java

252

// Form data

253

Connection conn = Jsoup.connect("https://example.com/api")

254

.method(Connection.Method.POST)

255

.data("username", "john")

256

.data("password", "secret")

257

.data("remember", "true");

258

259

// File upload

260

FileInputStream fileStream = new FileInputStream("document.pdf");

261

conn.data("file", "document.pdf", fileStream);

262

263

// Raw request body

264

conn.requestBody("{\"name\":\"John\",\"age\":30}")

265

.header("Content-Type", "application/json");

266

```

267

268

### Request Execution

269

270

Execute HTTP requests and handle responses.

271

272

```java { .api }

273

/**

274

* Execute a GET request and parse the response as HTML.

275

* @return Document containing parsed HTML

276

* @throws IOException if request fails or response cannot be parsed

277

*/

278

public Document get() throws IOException;

279

280

/**

281

* Execute a POST request and parse the response as HTML.

282

* @return Document containing parsed HTML

283

* @throws IOException if request fails or response cannot be parsed

284

*/

285

public Document post() throws IOException;

286

287

/**

288

* Execute the configured request and return the raw response.

289

* @return Connection.Response containing response data

290

* @throws IOException if request fails

291

*/

292

public Connection.Response execute() throws IOException;

293

```

294

295

**Usage Examples:**

296

297

```java

298

// GET request

299

Document doc = Jsoup.connect("https://example.com")

300

.userAgent("Mozilla/5.0")

301

.get();

302

303

// POST request

304

Document result = Jsoup.connect("https://example.com/search")

305

.data("q", "jsoup")

306

.post();

307

308

// Get raw response

309

Connection.Response response = Jsoup.connect("https://api.example.com")

310

.ignoreContentType(true)

311

.execute();

312

313

String responseBody = response.body();

314

int statusCode = response.statusCode();

315

```

316

317

### Session Management

318

319

Use sessions to maintain cookies and settings across multiple requests.

320

321

```java { .api }

322

/**

323

* Create a new request using this connection's session settings.

324

* @return new Connection.Request with session settings

325

*/

326

public Connection newRequest();

327

328

/**

329

* Get the current request configuration.

330

* @return Connection.Request object

331

*/

332

public Connection.Request request();

333

334

/**

335

* Set the request configuration.

336

* @param request Connection.Request object

337

* @return this Connection for chaining

338

*/

339

public Connection request(Connection.Request request);

340

341

/**

342

* Get the response from the last executed request.

343

* @return Connection.Response object, or null if no request executed

344

*/

345

public Connection.Response response();

346

347

/**

348

* Set the response object.

349

* @param response Connection.Response object

350

* @return this Connection for chaining

351

*/

352

public Connection response(Connection.Response response);

353

```

354

355

**Usage Examples:**

356

357

```java

358

// Create session with common settings

359

Connection session = Jsoup.newSession()

360

.timeout(30000)

361

.userAgent("MyBot/1.0")

362

.cookie("auth", "token123");

363

364

// Make multiple requests with shared session

365

Document page1 = session.newRequest()

366

.url("https://example.com/page1")

367

.get();

368

369

Document page2 = session.newRequest()

370

.url("https://example.com/page2")

371

.get();

372

373

// Session maintains cookies automatically

374

Connection.Response loginResponse = session.newRequest()

375

.url("https://example.com/login")

376

.data("username", "user")

377

.data("password", "pass")

378

.method(Connection.Method.POST)

379

.execute();

380

381

// Subsequent requests include login cookies

382

Document protectedPage = session.newRequest()

383

.url("https://example.com/protected")

384

.get();

385

```

386

387

### Response Handling

388

389

Work with HTTP response data and metadata.

390

391

```java { .api }

392

// Response interface methods

393

public interface Response {

394

/** Get response status code */

395

int statusCode();

396

397

/** Get response status message */

398

String statusMessage();

399

400

/** Get response body as string */

401

String body();

402

403

/** Get response body as bytes */

404

byte[] bodyAsBytes();

405

406

/** Get response headers */

407

Map<String, String> headers();

408

409

/** Get specific response header */

410

String header(String name);

411

412

/** Get response cookies */

413

Map<String, String> cookies();

414

415

/** Get specific response cookie */

416

String cookie(String name);

417

418

/** Get response content type */

419

String contentType();

420

421

/** Get response charset */

422

String charset();

423

424

/** Parse response body as Document */

425

Document parse() throws IOException;

426

}

427

```

428

429

**Usage Examples:**

430

431

```java

432

Connection.Response response = Jsoup.connect("https://api.example.com")

433

.ignoreContentType(true)

434

.execute();

435

436

// Response metadata

437

int status = response.statusCode();

438

String contentType = response.contentType();

439

Map<String, String> headers = response.headers();

440

Map<String, String> cookies = response.cookies();

441

442

// Response content

443

String body = response.body();

444

byte[] rawBytes = response.bodyAsBytes();

445

446

// Parse as HTML if needed

447

if (contentType.contains("text/html")) {

448

Document doc = response.parse();

449

}

450

```

451

452

### Custom Parser Configuration

453

454

Configure custom parsers for response processing.

455

456

```java { .api }

457

/**

458

* Set custom parser for processing responses.

459

* @param parser Parser instance (HTML or XML)

460

* @return this Connection for chaining

461

*/

462

public Connection parser(Parser parser);

463

```

464

465

**Usage Example:**

466

467

```java

468

import org.jsoup.parser.Parser;

469

470

// Use XML parser for XML responses

471

Connection conn = Jsoup.connect("https://example.com/data.xml")

472

.parser(Parser.xmlParser());

473

474

Document xmlDoc = conn.get();

475

```

476

477

### Progress Monitoring

478

479

Monitor download progress for large responses.

480

481

```java { .api }

482

/**

483

* Set progress callback for response downloads.

484

* @param progress Progress callback interface

485

* @return this Connection for chaining

486

*/

487

public Connection onResponseProgress(Progress progress);

488

489

// Progress interface

490

public interface Progress {

491

/**

492

* Called during response download with progress information.

493

* @param bytesRead bytes downloaded so far

494

* @param totalBytes total response size (may be -1 if unknown)

495

* @param percent completion percentage (0.0 to 100.0)

496

*/

497

void onProgress(long bytesRead, long totalBytes, float percent);

498

}

499

```

500

501

**Usage Example:**

502

503

```java

504

Connection conn = Jsoup.connect("https://example.com/large-page")

505

.onResponseProgress(new Progress() {

506

@Override

507

public void onProgress(long bytesRead, long totalBytes, float percent) {

508

System.out.printf("Downloaded: %d/%d bytes (%.1f%%)\n",

509

bytesRead, totalBytes, percent);

510

}

511

});

512

513

Document doc = conn.get();

514

```

515

516

### Exception Handling

517

518

Handle specific HTTP and connection errors.

519

520

```java { .api }

521

// HTTP status exceptions

522

public class HttpStatusException extends IOException {

523

public int getStatusCode();

524

public String getUrl();

525

}

526

527

// Unsupported content type exceptions

528

public class UnsupportedMimeTypeException extends IOException {

529

public String getMimeType();

530

public String getUrl();

531

}

532

```

533

534

**Usage Example:**

535

536

```java

537

try {

538

Document doc = Jsoup.connect("https://example.com")

539

.timeout(5000)

540

.get();

541

} catch (HttpStatusException e) {

542

System.err.println("HTTP error: " + e.getStatusCode() + " for URL: " + e.getUrl());

543

} catch (UnsupportedMimeTypeException e) {

544

System.err.println("Unsupported content type: " + e.getMimeType());

545

} catch (SocketTimeoutException e) {

546

System.err.println("Request timed out");

547

} catch (IOException e) {

548

System.err.println("Connection error: " + e.getMessage());

549

}

550

```

551

552

## Connection Methods and Constants

553

554

HTTP methods supported by jsoup Connection.

555

556

```java { .api }

557

public enum Method {

558

GET, POST, PUT, PATCH, DELETE, HEAD, OPTIONS, TRACE

559

}

560

```

561

562

**Usage Example:**

563

564

```java

565

// Different HTTP methods

566

Connection conn = Jsoup.connect("https://api.example.com");

567

568

// GET (default)

569

Document getResponse = conn.method(Connection.Method.GET).get();

570

571

// POST

572

Document postResponse = conn.method(Connection.Method.POST).post();

573

574

// Other methods

575

Connection.Response response = conn.method(Connection.Method.PUT).execute();

576

```

577

578

This comprehensive HTTP connection API provides enterprise-grade web scraping capabilities with full control over request configuration, session management, and response handling.