or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

big-data.mdcloud-storage.mdcompression.mdcore-operations.mdindex.mdnetwork-access.mdutilities.md

network-access.mddocs/

0

# Network and Remote Access

1

2

HTTP/HTTPS, FTP, and SSH-based file access with authentication and secure connection support. Smart-open provides seamless integration with various network protocols for remote file operations.

3

4

## Capabilities

5

6

### HTTP/HTTPS Operations

7

8

Read-only access to web resources with authentication and custom headers support.

9

10

```python { .api }

11

def open(uri, mode, kerberos=False, user=None, password=None, cert=None,

12

headers=None, timeout=None, session=None, buffer_size=256*1024):

13

"""Open HTTP/HTTPS resource for reading.

14

15

Parameters:

16

uri: str - HTTP(S) URL

17

mode: str - Must be 'rb' (read binary only)

18

kerberos: bool - Use Kerberos authentication

19

user: str - Basic authentication username

20

password: str - Basic authentication password

21

cert: str - Path to client certificate file

22

headers: dict - Additional HTTP headers

23

timeout: float - Request timeout in seconds

24

session: requests.Session - Custom requests session

25

buffer_size: int - Buffer size for reading (default: 256KB)

26

27

Returns:

28

File-like object for reading HTTP response

29

"""

30

31

def parse_uri(uri_as_string):

32

"""Parse HTTP/HTTPS URI into components.

33

34

Returns:

35

dict with parsed URI components

36

"""

37

```

38

39

### FTP/FTPS Operations

40

41

Full read/write access to FTP servers with secure connection support.

42

43

```python { .api }

44

def open(path, mode="rb", host=None, user=None, password=None, port=21,

45

secure_connection=False, transport_params=None):

46

"""Open FTP resource for reading or writing.

47

48

Parameters:

49

path: str - Remote file path

50

mode: str - File mode ('rb' or 'wb')

51

host: str - FTP server hostname

52

user: str - FTP username

53

password: str - FTP password

54

port: int - FTP server port (default: 21)

55

secure_connection: bool - Use FTPS (secure FTP)

56

transport_params: dict - Additional transport parameters

57

58

Returns:

59

File-like object for FTP operations

60

"""

61

62

def parse_uri(uri_as_string):

63

"""Parse FTP/FTPS URI into components.

64

65

Returns:

66

dict with hostname, username, password, path, port, scheme

67

"""

68

```

69

70

### SSH/SFTP Operations

71

72

Secure file access over SSH with key-based and password authentication.

73

74

```python { .api }

75

def open(path, mode="r", host=None, user=None, password=None, port=None,

76

connect_kwargs=None, prefetch_kwargs=None, buffer_size=-1):

77

"""Open SSH/SFTP resource for reading or writing.

78

79

Parameters:

80

path: str - Remote file path

81

mode: str - File mode ('r', 'w', 'rb', 'wb')

82

host: str - SSH server hostname

83

user: str - SSH username

84

password: str - SSH password (if not using key auth)

85

port: int - SSH server port (None for default)

86

connect_kwargs: dict - Additional SSH connection parameters including:

87

- pkey: paramiko.PKey - Private key object

88

- key_filename: str - Path to private key file

89

- look_for_keys: bool - Search for key files

90

- allow_agent: bool - Use SSH agent

91

- timeout: float - Connection timeout

92

- compress: bool - Enable compression

93

prefetch_kwargs: dict - SFTP prefetch parameters

94

buffer_size: int - Buffer size for I/O (-1 for default)

95

96

Returns:

97

File-like object for SSH/SFTP operations

98

"""

99

100

def parse_uri(uri_as_string):

101

"""Parse SSH/SCP/SFTP URI into components.

102

103

Returns:

104

dict with hostname, username, password, path, port, scheme

105

"""

106

```

107

108

## Usage Examples

109

110

### HTTP/HTTPS Examples

111

112

```python

113

from smart_open import open

114

115

# Simple HTTP read

116

with open('http://example.com/data.txt', 'rb') as f:

117

content = f.read()

118

119

# HTTPS with custom headers

120

transport_params = {

121

'headers': {

122

'User-Agent': 'MyApp/1.0',

123

'Authorization': 'Bearer token123'

124

},

125

'timeout': 30

126

}

127

with open('https://api.example.com/data.json', 'rb',

128

transport_params=transport_params) as f:

129

data = f.read()

130

131

# Basic authentication

132

transport_params = {

133

'user': 'username',

134

'password': 'password'

135

}

136

with open('https://secure.example.com/file.txt', 'rb',

137

transport_params=transport_params) as f:

138

content = f.read()

139

140

# Client certificate authentication

141

transport_params = {

142

'cert': '/path/to/client.pem',

143

'ca_certs': '/path/to/ca-bundle.pem'

144

}

145

with open('https://secure.example.com/api/data', 'rb',

146

transport_params=transport_params) as f:

147

response = f.read()

148

149

# Kerberos authentication (requires requests-kerberos)

150

transport_params = {'kerberos': True}

151

with open('https://intranet.company.com/file.txt', 'rb',

152

transport_params=transport_params) as f:

153

data = f.read()

154

```

155

156

### FTP Examples

157

158

```python

159

# Basic FTP access

160

with open('ftp://user:pass@ftp.example.com/path/file.txt', 'rb') as f:

161

content = f.read()

162

163

# FTP write operation

164

with open('ftp://user:pass@ftp.example.com/upload/data.txt', 'w') as f:

165

f.write('Upload this content')

166

167

# FTPS (secure FTP)

168

with open('ftps://user:pass@secure-ftp.example.com/file.txt', 'rb') as f:

169

content = f.read()

170

171

# Custom FTP port

172

with open('ftp://user:pass@ftp.example.com:2121/file.txt', 'rb') as f:

173

data = f.read()

174

175

# Direct FTP module usage

176

from smart_open.ftp import open as ftp_open

177

178

with ftp_open('ftp.example.com', 'username', 'password',

179

'/remote/path/file.txt', 'rb', port=21) as f:

180

content = f.read()

181

```

182

183

### SSH/SFTP Examples

184

185

```python

186

# Password authentication

187

with open('ssh://user:password@server.com/path/file.txt', 'rb') as f:

188

content = f.read()

189

190

# Key-based authentication (using SSH agent or default keys)

191

with open('ssh://user@server.com/path/file.txt', 'rb') as f:

192

content = f.read()

193

194

# Explicit private key file

195

transport_params = {

196

'key_filename': '/home/user/.ssh/id_rsa'

197

}

198

with open('ssh://user@server.com/path/file.txt', 'rb',

199

transport_params=transport_params) as f:

200

content = f.read()

201

202

# Custom SSH port and connection options

203

transport_params = {

204

'port': 2222,

205

'timeout': 10,

206

'compress': True,

207

'look_for_keys': True

208

}

209

with open('ssh://user@server.com/path/file.txt', 'rb',

210

transport_params=transport_params) as f:

211

content = f.read()

212

213

# SFTP write operation

214

with open('sftp://user@server.com/upload/data.txt', 'w') as f:

215

f.write('Remote file content')

216

217

# SCP-style URLs (same as SSH/SFTP)

218

with open('scp://user@server.com/path/file.txt', 'rb') as f:

219

content = f.read()

220

221

# Direct SSH module usage

222

from smart_open.ssh import open as ssh_open

223

224

with ssh_open('server.com', 'username', '/remote/path/file.txt', 'rb',

225

password='password', port=22) as f:

226

content = f.read()

227

```

228

229

## Authentication Methods

230

231

### HTTP Authentication

232

233

```python

234

# Basic authentication

235

transport_params = {

236

'user': 'username',

237

'password': 'password'

238

}

239

240

# Bearer token

241

transport_params = {

242

'headers': {'Authorization': 'Bearer your-token'}

243

}

244

245

# API key header

246

transport_params = {

247

'headers': {'X-API-Key': 'your-api-key'}

248

}

249

250

# Digest authentication (handled automatically by requests)

251

transport_params = {

252

'user': 'username',

253

'password': 'password'

254

}

255

256

# Custom authentication header

257

transport_params = {

258

'headers': {'Authorization': 'Custom your-auth-string'}

259

}

260

```

261

262

### SSH Key Management

263

264

```python

265

# Using specific private key

266

transport_params = {

267

'key_filename': '/path/to/private_key'

268

}

269

270

# Using multiple key files

271

transport_params = {

272

'key_filename': ['/path/to/key1', '/path/to/key2']

273

}

274

275

# Using paramiko PKey object

276

import paramiko

277

private_key = paramiko.RSAKey.from_private_key_file('/path/to/key')

278

transport_params = {

279

'pkey': private_key

280

}

281

282

# Disable automatic key search

283

transport_params = {

284

'look_for_keys': False,

285

'allow_agent': False,

286

'key_filename': '/specific/key/only'

287

}

288

```

289

290

## Security Considerations

291

292

### HTTPS Certificate Verification

293

294

```python

295

# Default behavior: verify certificates

296

with open('https://secure.example.com/file.txt', 'rb') as f:

297

content = f.read()

298

299

# Custom CA certificates

300

transport_params = {

301

'ca_certs': '/path/to/custom-ca-bundle.pem'

302

}

303

304

# Client certificate authentication

305

transport_params = {

306

'cert': '/path/to/client-cert.pem' # Can include key

307

}

308

309

# Separate cert and key files

310

transport_params = {

311

'cert': ('/path/to/client-cert.pem', '/path/to/client-key.pem')

312

}

313

```

314

315

### SSH Security

316

317

```python

318

# Restrict to specific host key

319

transport_params = {

320

'host_key_policy': paramiko.RejectPolicy() # Reject unknown hosts

321

}

322

323

# Custom host key verification

324

import paramiko

325

326

class CustomHostKeyPolicy(paramiko.MissingHostKeyPolicy):

327

def missing_host_key(self, client, hostname, key):

328

# Custom host key verification logic

329

pass

330

331

transport_params = {

332

'host_key_policy': CustomHostKeyPolicy()

333

}

334

335

# Connection timeout and retries

336

transport_params = {

337

'timeout': 10,

338

'banner_timeout': 30,

339

'auth_timeout': 30

340

}

341

```

342

343

## Error Handling

344

345

### HTTP Error Handling

346

347

```python

348

import requests

349

from smart_open import open

350

351

try:

352

with open('https://api.example.com/data.json', 'rb') as f:

353

data = f.read()

354

except requests.exceptions.HTTPError as e:

355

status_code = e.response.status_code

356

if status_code == 404:

357

print("Resource not found")

358

elif status_code == 401:

359

print("Authentication required")

360

elif status_code == 403:

361

print("Access forbidden")

362

else:

363

print(f"HTTP error: {status_code}")

364

except requests.exceptions.ConnectionError:

365

print("Connection failed")

366

except requests.exceptions.Timeout:

367

print("Request timed out")

368

```

369

370

### SSH Error Handling

371

372

```python

373

import paramiko

374

from smart_open import open

375

376

try:

377

with open('ssh://user@server.com/file.txt', 'rb') as f:

378

content = f.read()

379

except paramiko.AuthenticationException:

380

print("SSH authentication failed")

381

except paramiko.SSHException as e:

382

print(f"SSH connection error: {e}")

383

except FileNotFoundError:

384

print("Remote file not found")

385

except PermissionError:

386

print("Permission denied")

387

```

388

389

### FTP Error Handling

390

391

```python

392

import ftplib

393

from smart_open import open

394

395

try:

396

with open('ftp://user:pass@server.com/file.txt', 'rb') as f:

397

content = f.read()

398

except ftplib.error_perm as e:

399

error_code = str(e)[:3]

400

if error_code == '530':

401

print("FTP authentication failed")

402

elif error_code == '550':

403

print("File not found or no permission")

404

else:

405

print(f"FTP permission error: {e}")

406

except ftplib.error_temp as e:

407

print(f"Temporary FTP error: {e}")

408

except ConnectionError:

409

print("FTP connection failed")

410

```

411

412

## Performance and Reliability

413

414

### HTTP Performance

415

416

```python

417

# Connection pooling and keep-alive

418

import requests

419

session = requests.Session()

420

adapter = requests.adapters.HTTPAdapter(

421

pool_connections=10,

422

pool_maxsize=20,

423

max_retries=3

424

)

425

session.mount('http://', adapter)

426

session.mount('https://', adapter)

427

428

transport_params = {'session': session}

429

430

# Streaming large files

431

with open('https://example.com/large-file.dat', 'rb',

432

transport_params={'stream': True}) as f:

433

for chunk in iter(lambda: f.read(8192), b''):

434

process_chunk(chunk)

435

```

436

437

### SSH Connection Reuse

438

439

```python

440

import paramiko

441

442

# Reuse SSH client for multiple files

443

client = paramiko.SSHClient()

444

client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

445

client.connect('server.com', username='user', password='pass')

446

447

transport_params = {'client': client}

448

449

# Use same client for multiple operations

450

with open('ssh://server.com/file1.txt', 'rb',

451

transport_params=transport_params) as f:

452

content1 = f.read()

453

454

with open('ssh://server.com/file2.txt', 'rb',

455

transport_params=transport_params) as f:

456

content2 = f.read()

457

458

client.close()

459

```