or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

big-data.mdcloud-storage.mdcompression.mdcore-operations.mdindex.mdnetwork-access.mdutilities.md

core-operations.mddocs/

0

# Core File Operations

1

2

Universal file operations that work across all supported storage systems with transparent compression support. These functions provide the primary interface for smart-open and serve as drop-in replacements for Python's built-in file operations.

3

4

## Capabilities

5

6

### Universal File Opening

7

8

The main entry point for accessing files across all supported storage systems.

9

10

```python { .api }

11

def open(uri, mode='r', buffering=-1, encoding=None, errors=None, newline=None,

12

closefd=True, opener=None, compression='infer_from_extension',

13

transport_params=None):

14

"""Open URI object, returning a file-like object.

15

16

Parameters:

17

uri: str, pathlib.Path, or file-like object - The resource to open

18

mode: str - File access mode ('r', 'w', 'rb', 'wb', etc.)

19

buffering: int - Buffer size (-1 for system default)

20

encoding: str - Text encoding for text mode

21

errors: str - Error handling strategy for text mode

22

newline: str - Newline handling for text mode

23

closefd: bool - Close file descriptor (ignored, for compatibility)

24

opener: callable - Custom opener (ignored, for compatibility)

25

compression: str - Compression type or 'infer_from_extension'

26

transport_params: dict - Transport-specific parameters

27

28

Returns:

29

File-like object implementing standard Python I/O interface

30

"""

31

```

32

33

### URI Parsing

34

35

Parse and analyze URI components for different storage systems.

36

37

```python { .api }

38

def parse_uri(uri_as_string):

39

"""Parse URI string into transport-specific components.

40

41

Parameters:

42

uri_as_string: str - The URI to parse

43

44

Returns:

45

collections.namedtuple - Parsed URI with transport-specific fields

46

"""

47

```

48

49

### Legacy Compatibility

50

51

Deprecated functions maintained for backward compatibility.

52

53

```python { .api }

54

def smart_open(uri, mode='rb', buffering=-1, encoding=None, errors=None,

55

newline=None, closefd=True, opener=None, ignore_extension=False,

56

**kwargs):

57

"""Legacy function - use open() instead.

58

59

Parameters:

60

uri: str - The resource to open

61

mode: str - File access mode (default: 'rb')

62

buffering: int - Buffer size

63

encoding: str - Text encoding

64

errors: str - Error handling strategy

65

newline: str - Newline handling

66

closefd: bool - Close file descriptor (ignored)

67

opener: callable - Custom opener (ignored)

68

ignore_extension: bool - If True, disables compression; if False, infers from extension

69

**kwargs: Transport parameters (deprecated - raises DeprecationWarning)

70

71

Deprecated: This function raises DeprecationWarning and is maintained only for

72

backward compatibility. Main differences from open():

73

- Default mode is 'rb' instead of 'r'

74

- Uses ignore_extension parameter instead of compression parameter

75

- Transport parameters passed as **kwargs (now deprecated)

76

77

Use smart_open.open() for new code.

78

"""

79

80

def s3_iter_bucket(bucket_name, prefix='', accept_key=None, key_limit=None,

81

workers=16, retries=3, **session_kwargs):

82

"""Deprecated - use smart_open.s3.iter_bucket instead.

83

84

Iterate over S3 bucket contents in parallel.

85

"""

86

```

87

88

## Usage Examples

89

90

### Basic File Operations

91

92

```python

93

from smart_open import open

94

95

# Read text file

96

with open('s3://my-bucket/data.txt') as f:

97

content = f.read()

98

99

# Read binary file

100

with open('gs://my-bucket/image.jpg', 'rb') as f:

101

binary_data = f.read()

102

103

# Write text file

104

with open('azure://container/output.txt', 'w') as f:

105

f.write('Hello, world!')

106

107

# Write binary file

108

with open('s3://bucket/data.bin', 'wb') as f:

109

f.write(b'binary data')

110

```

111

112

### Compressed Files

113

114

```python

115

# Automatic compression detection from extension

116

with open('s3://bucket/data.txt.gz') as f:

117

uncompressed_text = f.read()

118

119

# Explicit compression specification

120

with open('s3://bucket/data.txt', compression='gzip') as f:

121

text = f.read()

122

123

# Writing compressed files

124

with open('gs://bucket/output.txt.bz2', 'w') as f:

125

f.write('This will be compressed automatically')

126

```

127

128

### Advanced Options

129

130

```python

131

# Custom encoding and error handling

132

with open('s3://bucket/text.txt', encoding='utf-8', errors='ignore') as f:

133

text = f.read()

134

135

# Custom buffer size

136

with open('gs://bucket/large-file.dat', 'rb', buffering=1024*1024) as f:

137

chunk = f.read(4096)

138

139

# Transport-specific parameters

140

transport_params = {

141

'client_kwargs': {'region_name': 'us-west-2'},

142

'buffer_size': 1024*1024

143

}

144

with open('s3://bucket/file.txt', transport_params=transport_params) as f:

145

data = f.read()

146

```

147

148

### Seeking and Random Access

149

150

```python

151

# Random access to remote files

152

with open('s3://bucket/data.bin', 'rb') as f:

153

f.seek(1000) # Seek to byte 1000

154

chunk = f.read(100) # Read 100 bytes

155

156

f.seek(0, 2) # Seek to end

157

file_size = f.tell()

158

159

f.seek(-100, 2) # Seek to 100 bytes from end

160

tail = f.read()

161

```

162

163

### URI Parsing Examples

164

165

```python

166

from smart_open import parse_uri

167

168

# Parse S3 URI

169

parsed = parse_uri('s3://my-bucket/path/to/file.txt')

170

print(parsed.scheme) # 's3'

171

print(parsed.bucket_id) # 'my-bucket'

172

print(parsed.key_id) # 'path/to/file.txt'

173

174

# Parse HTTP URI

175

parsed = parse_uri('https://example.com/data.json')

176

print(parsed.scheme) # 'https'

177

print(parsed.uri_path) # '/data.json'

178

179

# Parse local file URI

180

parsed = parse_uri('file:///home/user/data.txt')

181

print(parsed.scheme) # 'file'

182

print(parsed.uri_path) # '/home/user/data.txt'

183

```

184

185

## Error Handling

186

187

Smart-open provides consistent error handling across all transport layers:

188

189

```python

190

from smart_open import open

191

import boto3

192

193

try:

194

with open('s3://bucket/nonexistent.txt') as f:

195

content = f.read()

196

except FileNotFoundError:

197

print("File not found")

198

except PermissionError:

199

print("Access denied")

200

except Exception as e:

201

print(f"Other error: {e}")

202

```

203

204

## Performance Considerations

205

206

- Use binary mode (`'rb'`, `'wb'`) for better performance with large files

207

- Adjust `buffering` parameter for optimal I/O performance

208

- Consider transport-specific parameters for fine-tuning

209

- Use streaming operations (`for line in file`) for memory efficiency

210

- Enable multipart uploads for large S3 writes via transport_params