0
# Network and Remote Access
1
2
HTTP/HTTPS, FTP, and SSH-based file access with authentication and secure connection support. Smart-open provides seamless integration with various network protocols for remote file operations.
3
4
## Capabilities
5
6
### HTTP/HTTPS Operations
7
8
Read-only access to web resources with authentication and custom headers support.
9
10
```python { .api }
11
def open(uri, mode, kerberos=False, user=None, password=None, cert=None,
12
headers=None, timeout=None, session=None, buffer_size=256*1024):
13
"""Open HTTP/HTTPS resource for reading.
14
15
Parameters:
16
uri: str - HTTP(S) URL
17
mode: str - Must be 'rb' (read binary only)
18
kerberos: bool - Use Kerberos authentication
19
user: str - Basic authentication username
20
password: str - Basic authentication password
21
cert: str - Path to client certificate file
22
headers: dict - Additional HTTP headers
23
timeout: float - Request timeout in seconds
24
session: requests.Session - Custom requests session
25
buffer_size: int - Buffer size for reading (default: 256KB)
26
27
Returns:
28
File-like object for reading HTTP response
29
"""
30
31
def parse_uri(uri_as_string):
32
"""Parse HTTP/HTTPS URI into components.
33
34
Returns:
35
dict with parsed URI components
36
"""
37
```
38
39
### FTP/FTPS Operations
40
41
Full read/write access to FTP servers with secure connection support.
42
43
```python { .api }
44
def open(path, mode="rb", host=None, user=None, password=None, port=21,
45
secure_connection=False, transport_params=None):
46
"""Open FTP resource for reading or writing.
47
48
Parameters:
49
path: str - Remote file path
50
mode: str - File mode ('rb' or 'wb')
51
host: str - FTP server hostname
52
user: str - FTP username
53
password: str - FTP password
54
port: int - FTP server port (default: 21)
55
secure_connection: bool - Use FTPS (secure FTP)
56
transport_params: dict - Additional transport parameters
57
58
Returns:
59
File-like object for FTP operations
60
"""
61
62
def parse_uri(uri_as_string):
63
"""Parse FTP/FTPS URI into components.
64
65
Returns:
66
dict with hostname, username, password, path, port, scheme
67
"""
68
```
69
70
### SSH/SFTP Operations
71
72
Secure file access over SSH with key-based and password authentication.
73
74
```python { .api }
75
def open(path, mode="r", host=None, user=None, password=None, port=None,
76
connect_kwargs=None, prefetch_kwargs=None, buffer_size=-1):
77
"""Open SSH/SFTP resource for reading or writing.
78
79
Parameters:
80
path: str - Remote file path
81
mode: str - File mode ('r', 'w', 'rb', 'wb')
82
host: str - SSH server hostname
83
user: str - SSH username
84
password: str - SSH password (if not using key auth)
85
port: int - SSH server port (None for default)
86
connect_kwargs: dict - Additional SSH connection parameters including:
87
- pkey: paramiko.PKey - Private key object
88
- key_filename: str - Path to private key file
89
- look_for_keys: bool - Search for key files
90
- allow_agent: bool - Use SSH agent
91
- timeout: float - Connection timeout
92
- compress: bool - Enable compression
93
prefetch_kwargs: dict - SFTP prefetch parameters
94
buffer_size: int - Buffer size for I/O (-1 for default)
95
96
Returns:
97
File-like object for SSH/SFTP operations
98
"""
99
100
def parse_uri(uri_as_string):
101
"""Parse SSH/SCP/SFTP URI into components.
102
103
Returns:
104
dict with hostname, username, password, path, port, scheme
105
"""
106
```
107
108
## Usage Examples
109
110
### HTTP/HTTPS Examples
111
112
```python
113
from smart_open import open
114
115
# Simple HTTP read
116
with open('http://example.com/data.txt', 'rb') as f:
117
content = f.read()
118
119
# HTTPS with custom headers
120
transport_params = {
121
'headers': {
122
'User-Agent': 'MyApp/1.0',
123
'Authorization': 'Bearer token123'
124
},
125
'timeout': 30
126
}
127
with open('https://api.example.com/data.json', 'rb',
128
transport_params=transport_params) as f:
129
data = f.read()
130
131
# Basic authentication
132
transport_params = {
133
'user': 'username',
134
'password': 'password'
135
}
136
with open('https://secure.example.com/file.txt', 'rb',
137
transport_params=transport_params) as f:
138
content = f.read()
139
140
# Client certificate authentication
141
transport_params = {
142
'cert': '/path/to/client.pem',
143
'ca_certs': '/path/to/ca-bundle.pem'
144
}
145
with open('https://secure.example.com/api/data', 'rb',
146
transport_params=transport_params) as f:
147
response = f.read()
148
149
# Kerberos authentication (requires requests-kerberos)
150
transport_params = {'kerberos': True}
151
with open('https://intranet.company.com/file.txt', 'rb',
152
transport_params=transport_params) as f:
153
data = f.read()
154
```
155
156
### FTP Examples
157
158
```python
159
# Basic FTP access
160
with open('ftp://user:pass@ftp.example.com/path/file.txt', 'rb') as f:
161
content = f.read()
162
163
# FTP write operation
164
with open('ftp://user:pass@ftp.example.com/upload/data.txt', 'w') as f:
165
f.write('Upload this content')
166
167
# FTPS (secure FTP)
168
with open('ftps://user:pass@secure-ftp.example.com/file.txt', 'rb') as f:
169
content = f.read()
170
171
# Custom FTP port
172
with open('ftp://user:pass@ftp.example.com:2121/file.txt', 'rb') as f:
173
data = f.read()
174
175
# Direct FTP module usage
176
from smart_open.ftp import open as ftp_open
177
178
with ftp_open('ftp.example.com', 'username', 'password',
179
'/remote/path/file.txt', 'rb', port=21) as f:
180
content = f.read()
181
```
182
183
### SSH/SFTP Examples
184
185
```python
186
# Password authentication
187
with open('ssh://user:password@server.com/path/file.txt', 'rb') as f:
188
content = f.read()
189
190
# Key-based authentication (using SSH agent or default keys)
191
with open('ssh://user@server.com/path/file.txt', 'rb') as f:
192
content = f.read()
193
194
# Explicit private key file
195
transport_params = {
196
'key_filename': '/home/user/.ssh/id_rsa'
197
}
198
with open('ssh://user@server.com/path/file.txt', 'rb',
199
transport_params=transport_params) as f:
200
content = f.read()
201
202
# Custom SSH port and connection options
203
transport_params = {
204
'port': 2222,
205
'timeout': 10,
206
'compress': True,
207
'look_for_keys': True
208
}
209
with open('ssh://user@server.com/path/file.txt', 'rb',
210
transport_params=transport_params) as f:
211
content = f.read()
212
213
# SFTP write operation
214
with open('sftp://user@server.com/upload/data.txt', 'w') as f:
215
f.write('Remote file content')
216
217
# SCP-style URLs (same as SSH/SFTP)
218
with open('scp://user@server.com/path/file.txt', 'rb') as f:
219
content = f.read()
220
221
# Direct SSH module usage
222
from smart_open.ssh import open as ssh_open
223
224
with ssh_open('server.com', 'username', '/remote/path/file.txt', 'rb',
225
password='password', port=22) as f:
226
content = f.read()
227
```
228
229
## Authentication Methods
230
231
### HTTP Authentication
232
233
```python
234
# Basic authentication
235
transport_params = {
236
'user': 'username',
237
'password': 'password'
238
}
239
240
# Bearer token
241
transport_params = {
242
'headers': {'Authorization': 'Bearer your-token'}
243
}
244
245
# API key header
246
transport_params = {
247
'headers': {'X-API-Key': 'your-api-key'}
248
}
249
250
# Digest authentication (handled automatically by requests)
251
transport_params = {
252
'user': 'username',
253
'password': 'password'
254
}
255
256
# Custom authentication header
257
transport_params = {
258
'headers': {'Authorization': 'Custom your-auth-string'}
259
}
260
```
261
262
### SSH Key Management
263
264
```python
265
# Using specific private key
266
transport_params = {
267
'key_filename': '/path/to/private_key'
268
}
269
270
# Using multiple key files
271
transport_params = {
272
'key_filename': ['/path/to/key1', '/path/to/key2']
273
}
274
275
# Using paramiko PKey object
276
import paramiko
277
private_key = paramiko.RSAKey.from_private_key_file('/path/to/key')
278
transport_params = {
279
'pkey': private_key
280
}
281
282
# Disable automatic key search
283
transport_params = {
284
'look_for_keys': False,
285
'allow_agent': False,
286
'key_filename': '/specific/key/only'
287
}
288
```
289
290
## Security Considerations
291
292
### HTTPS Certificate Verification
293
294
```python
295
# Default behavior: verify certificates
296
with open('https://secure.example.com/file.txt', 'rb') as f:
297
content = f.read()
298
299
# Custom CA certificates
300
transport_params = {
301
'ca_certs': '/path/to/custom-ca-bundle.pem'
302
}
303
304
# Client certificate authentication
305
transport_params = {
306
'cert': '/path/to/client-cert.pem' # Can include key
307
}
308
309
# Separate cert and key files
310
transport_params = {
311
'cert': ('/path/to/client-cert.pem', '/path/to/client-key.pem')
312
}
313
```
314
315
### SSH Security
316
317
```python
318
# Restrict to specific host key
319
transport_params = {
320
'host_key_policy': paramiko.RejectPolicy() # Reject unknown hosts
321
}
322
323
# Custom host key verification
324
import paramiko
325
326
class CustomHostKeyPolicy(paramiko.MissingHostKeyPolicy):
327
def missing_host_key(self, client, hostname, key):
328
# Custom host key verification logic
329
pass
330
331
transport_params = {
332
'host_key_policy': CustomHostKeyPolicy()
333
}
334
335
# Connection timeout and retries
336
transport_params = {
337
'timeout': 10,
338
'banner_timeout': 30,
339
'auth_timeout': 30
340
}
341
```
342
343
## Error Handling
344
345
### HTTP Error Handling
346
347
```python
348
import requests
349
from smart_open import open
350
351
try:
352
with open('https://api.example.com/data.json', 'rb') as f:
353
data = f.read()
354
except requests.exceptions.HTTPError as e:
355
status_code = e.response.status_code
356
if status_code == 404:
357
print("Resource not found")
358
elif status_code == 401:
359
print("Authentication required")
360
elif status_code == 403:
361
print("Access forbidden")
362
else:
363
print(f"HTTP error: {status_code}")
364
except requests.exceptions.ConnectionError:
365
print("Connection failed")
366
except requests.exceptions.Timeout:
367
print("Request timed out")
368
```
369
370
### SSH Error Handling
371
372
```python
373
import paramiko
374
from smart_open import open
375
376
try:
377
with open('ssh://user@server.com/file.txt', 'rb') as f:
378
content = f.read()
379
except paramiko.AuthenticationException:
380
print("SSH authentication failed")
381
except paramiko.SSHException as e:
382
print(f"SSH connection error: {e}")
383
except FileNotFoundError:
384
print("Remote file not found")
385
except PermissionError:
386
print("Permission denied")
387
```
388
389
### FTP Error Handling
390
391
```python
392
import ftplib
393
from smart_open import open
394
395
try:
396
with open('ftp://user:pass@server.com/file.txt', 'rb') as f:
397
content = f.read()
398
except ftplib.error_perm as e:
399
error_code = str(e)[:3]
400
if error_code == '530':
401
print("FTP authentication failed")
402
elif error_code == '550':
403
print("File not found or no permission")
404
else:
405
print(f"FTP permission error: {e}")
406
except ftplib.error_temp as e:
407
print(f"Temporary FTP error: {e}")
408
except ConnectionError:
409
print("FTP connection failed")
410
```
411
412
## Performance and Reliability
413
414
### HTTP Performance
415
416
```python
417
# Connection pooling and keep-alive
418
import requests
419
session = requests.Session()
420
adapter = requests.adapters.HTTPAdapter(
421
pool_connections=10,
422
pool_maxsize=20,
423
max_retries=3
424
)
425
session.mount('http://', adapter)
426
session.mount('https://', adapter)
427
428
transport_params = {'session': session}
429
430
# Streaming large files
431
with open('https://example.com/large-file.dat', 'rb',
432
transport_params={'stream': True}) as f:
433
for chunk in iter(lambda: f.read(8192), b''):
434
process_chunk(chunk)
435
```
436
437
### SSH Connection Reuse
438
439
```python
440
import paramiko
441
442
# Reuse SSH client for multiple files
443
client = paramiko.SSHClient()
444
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
445
client.connect('server.com', username='user', password='pass')
446
447
transport_params = {'client': client}
448
449
# Use same client for multiple operations
450
with open('ssh://server.com/file1.txt', 'rb',
451
transport_params=transport_params) as f:
452
content1 = f.read()
453
454
with open('ssh://server.com/file2.txt', 'rb',
455
transport_params=transport_params) as f:
456
content2 = f.read()
457
458
client.close()
459
```