or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dict-operations.mddownload-caching.mdfunction-utilities.mdhashing-imports.mdindex.mdlist-operations.mdpath-operations.mdprogress-timing.mdsystem-integration.mdtext-processing.md

hashing-imports.mddocs/

0

# Hashing and Import Utilities

1

2

Hash arbitrary data and files, plus dynamic module importing and path resolution utilities for data integrity and module management.

3

4

## Capabilities

5

6

### Data and File Hashing

7

8

Functions for computing secure hashes of arbitrary Python data structures and files.

9

10

```python { .api }

11

def hash_data(data, hasher=NoParam, base=NoParam, types=False, convert=False, extensions=None):

12

"""

13

Hash arbitrary Python data structures.

14

15

Args:

16

data: Any Python object (dict, list, str, etc.)

17

hasher: Hash algorithm (NoParam uses 'sha512')

18

base: Output encoding (NoParam uses 'hex')

19

types (bool): Include type information in hash

20

convert (bool): Convert data to hashable format

21

extensions: Custom extensions for handling special types

22

23

Returns:

24

str: Hash digest as string

25

26

Note:

27

Data is normalized for consistent hashing across runs.

28

Supports nested structures, numpy arrays, and custom objects.

29

NoParam defaults: hasher='sha512', base='hex'

30

"""

31

32

def hash_file(fpath, blocksize=1048576, stride=1, maxbytes=None, hasher=NoParam, base=NoParam):

33

"""

34

Hash file contents efficiently.

35

36

Args:

37

fpath (str|Path): Path to file

38

blocksize (int): Read block size in bytes (default: 1MB)

39

stride (int): Read every nth block (default: 1 = all blocks)

40

maxbytes (int): Maximum bytes to read (None = entire file)

41

hasher: Hash algorithm (NoParam uses 'sha512')

42

base: Output encoding (NoParam uses 'hex')

43

44

Returns:

45

str: File hash digest

46

47

Raises:

48

FileNotFoundError: File does not exist

49

IOError: Cannot read file

50

51

Note:

52

NoParam defaults: hasher='sha512', base='hex'

53

"""

54

```

55

56

### Module Import Utilities

57

58

Dynamic module importing and path resolution for runtime module loading.

59

60

```python { .api }

61

def import_module_from_name(name, **kwargs):

62

"""

63

Import module by name with error handling.

64

65

Args:

66

name (str): Module name (e.g., 'os.path', 'numpy')

67

**kwargs: Additional import options

68

69

Returns:

70

module: Imported module object

71

72

Raises:

73

ImportError: Module cannot be imported

74

"""

75

76

def import_module_from_path(modpath, index=-1):

77

"""

78

Import module from file path.

79

80

Args:

81

modpath (str|Path): Path to Python file

82

index (int): Module index for namespace packages

83

84

Returns:

85

module: Imported module object

86

87

Raises:

88

ImportError: Cannot import from path

89

FileNotFoundError: File does not exist

90

"""

91

```

92

93

### Module Path Utilities

94

95

Functions for converting between module names and file paths.

96

97

```python { .api }

98

def modname_to_modpath(modname, **kwargs):

99

"""

100

Convert module name to file path.

101

102

Args:

103

modname (str): Module name (e.g., 'os.path')

104

**kwargs: Additional resolution options

105

106

Returns:

107

str|None: Path to module file or None if not found

108

"""

109

110

def modpath_to_modname(fpath, **kwargs):

111

"""

112

Convert file path to module name.

113

114

Args:

115

fpath (str|Path): Path to Python file

116

**kwargs: Additional conversion options

117

118

Returns:

119

str: Module name

120

"""

121

122

def split_modpath(fpath, **kwargs):

123

"""

124

Split module path into components.

125

126

Args:

127

fpath (str|Path): Path to Python file

128

**kwargs: Additional options

129

130

Returns:

131

dict: Dictionary with path components

132

"""

133

```

134

135

## Usage Examples

136

137

### Data Hashing

138

139

```python

140

import ubelt as ub

141

142

# Hash simple data

143

data = {'name': 'Alice', 'age': 30, 'scores': [95, 87, 92]}

144

hash_value = ub.hash_data(data)

145

print(f"Data hash: {hash_value}")

146

147

# Different hash algorithms

148

sha256_hash = ub.hash_data(data, hasher='sha256')

149

md5_hash = ub.hash_data(data, hasher='md5')

150

print(f"SHA256: {sha256_hash}")

151

print(f"MD5: {md5_hash}")

152

153

# Different output encodings

154

hex_hash = ub.hash_data(data, base='hex')

155

b64_hash = ub.hash_data(data, base='base64')

156

print(f"Hex: {hex_hash}")

157

print(f"Base64: {b64_hash}")

158

159

# Truncated hashes

160

short_hash = ub.hash_data(data, hashlen=8)

161

print(f"Short hash: {short_hash}")

162

```

163

164

### Complex Data Hashing

165

166

```python

167

import ubelt as ub

168

import numpy as np

169

170

# Hash complex nested structures

171

complex_data = {

172

'metadata': {

173

'version': '1.0',

174

'created': '2023-01-01'

175

},

176

'arrays': [

177

np.array([1, 2, 3, 4]),

178

np.array([[1, 2], [3, 4]])

179

],

180

'config': {

181

'learning_rate': 0.001,

182

'batch_size': 32,

183

'layers': [128, 64, 32]

184

}

185

}

186

187

hash_value = ub.hash_data(complex_data)

188

print(f"Complex data hash: {hash_value}")

189

190

# Hashing is consistent across runs

191

hash2 = ub.hash_data(complex_data)

192

assert hash_value == hash2 # Same data produces same hash

193

194

# Order-independent hashing for dicts

195

data1 = {'a': 1, 'b': 2}

196

data2 = {'b': 2, 'a': 1}

197

hash1 = ub.hash_data(data1)

198

hash2 = ub.hash_data(data2)

199

assert hash1 == hash2 # Dict order doesn't matter

200

```

201

202

### File Hashing

203

204

```python

205

import ubelt as ub

206

207

# Hash file contents

208

file_path = 'example.txt'

209

with open(file_path, 'w') as f:

210

f.write('Hello, World!')

211

212

file_hash = ub.hash_file(file_path)

213

print(f"File hash: {file_hash}")

214

215

# Hash large files efficiently (uses chunks)

216

large_file_hash = ub.hash_file('large_file.bin', blocksize=65536)

217

218

# Verify file integrity

219

def verify_file(fpath, expected_hash):

220

actual_hash = ub.hash_file(fpath)

221

return actual_hash == expected_hash

222

223

is_valid = verify_file(file_path, file_hash)

224

print(f"File is valid: {is_valid}")

225

226

# Quick hash for caching

227

cache_key = ub.hash_file('config.json', hashlen=8)

228

print(f"Cache key: {cache_key}")

229

```

230

231

### Dynamic Module Importing

232

233

```python

234

import ubelt as ub

235

236

# Import module by name

237

os_module = ub.import_module_from_name('os')

238

print(f"OS name: {os_module.name}")

239

240

# Import submodules

241

path_module = ub.import_module_from_name('os.path')

242

print(f"Current dir: {path_module.abspath('.')}")

243

244

# Safe importing with error handling

245

try:

246

numpy = ub.import_module_from_name('numpy')

247

print("NumPy is available")

248

except ImportError:

249

print("NumPy not installed")

250

251

# Import from file path

252

script_path = 'my_script.py'

253

with open(script_path, 'w') as f:

254

f.write('''

255

def greet(name):

256

return f"Hello, {name}!"

257

258

VERSION = "1.0"

259

''')

260

261

my_module = ub.import_module_from_path(script_path)

262

print(my_module.greet("World"))

263

print(f"Version: {my_module.VERSION}")

264

```

265

266

### Module Path Resolution

267

268

```python

269

import ubelt as ub

270

271

# Convert module name to path

272

os_path = ub.modname_to_modpath('os')

273

print(f"OS module path: {os_path}")

274

275

json_path = ub.modname_to_modpath('json')

276

print(f"JSON module path: {json_path}")

277

278

# Convert path to module name

279

if json_path:

280

module_name = ub.modpath_to_modname(json_path)

281

print(f"Module name: {module_name}")

282

283

# Split module path into components

284

if json_path:

285

components = ub.split_modpath(json_path)

286

print(f"Path components: {components}")

287

288

# Find package modules

289

import sys

290

for path in sys.path:

291

if 'site-packages' in path:

292

print(f"Site packages: {path}")

293

break

294

```

295

296

### Data Integrity and Caching

297

298

```python

299

import ubelt as ub

300

import json

301

302

# Cache with data integrity

303

def cached_computation(data):

304

"""Cache expensive computation with data hash as key"""

305

data_hash = ub.hash_data(data, hashlen=16)

306

cache_file = f'cache_{data_hash}.json'

307

308

try:

309

with open(cache_file, 'r') as f:

310

cached_result = json.load(f)

311

print("Using cached result")

312

return cached_result

313

except FileNotFoundError:

314

print("Computing new result")

315

# Expensive computation

316

result = sum(x**2 for x in data.get('values', []))

317

318

# Cache the result

319

with open(cache_file, 'w') as f:

320

json.dump(result, f)

321

322

return result

323

324

# Test caching

325

data1 = {'values': [1, 2, 3, 4, 5], 'metadata': 'test'}

326

result1 = cached_computation(data1) # Computes new

327

result2 = cached_computation(data1) # Uses cache

328

329

# Different data gets different cache

330

data2 = {'values': [1, 2, 3, 4, 6], 'metadata': 'test'} # Changed last value

331

result3 = cached_computation(data2) # Computes new

332

333

print(f"Results: {result1}, {result2}, {result3}")

334

```

335

336

### File Verification and Checksums

337

338

```python

339

import ubelt as ub

340

341

# Create checksums for multiple files

342

files_to_check = ['file1.txt', 'file2.txt', 'file3.txt']

343

344

# Create test files

345

for i, fname in enumerate(files_to_check):

346

with open(fname, 'w') as f:

347

f.write(f'Content of file {i+1}')

348

349

# Generate checksums

350

checksums = {}

351

for fpath in files_to_check:

352

checksums[fpath] = ub.hash_file(fpath, hasher='sha256', hashlen=16)

353

354

print("File checksums:")

355

for fpath, checksum in checksums.items():

356

print(f"{fpath}: {checksum}")

357

358

# Verify files later

359

def verify_files(expected_checksums):

360

"""Verify files haven't changed"""

361

for fpath, expected in expected_checksums.items():

362

try:

363

actual = ub.hash_file(fpath, hasher='sha256', hashlen=16)

364

if actual == expected:

365

print(f"✓ {fpath} is valid")

366

else:

367

print(f"✗ {fpath} has changed!")

368

except FileNotFoundError:

369

print(f"✗ {fpath} is missing!")

370

371

verify_files(checksums)

372

373

# Modify a file and check again

374

with open('file2.txt', 'a') as f:

375

f.write(' - modified')

376

377

print("\nAfter modification:")

378

verify_files(checksums)

379

```