or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdcomment-handling.mdglobal-licensing.mdindex.mdproject-management.mdreport-generation.mdreuse-info.mdvcs-integration.md

reuse-info.mddocs/

0

# REUSE Information Processing

1

2

The REUSE information processing system provides data structures and functions for handling licensing and copyright information. The core `ReuseInfo` class and supporting utilities enable extraction, manipulation, and analysis of REUSE compliance data.

3

4

## Capabilities

5

6

### Core Data Structures

7

8

The foundational data classes for REUSE information handling.

9

10

```python { .api }

11

@dataclass(frozen=True)

12

class ReuseInfo:

13

"""

14

Simple dataclass holding licensing and copyright information.

15

16

Attributes:

17

spdx_expressions: set[Expression] - SPDX license expressions

18

copyright_lines: set[str] - Copyright statements

19

contributor_lines: set[str] - Contributor information

20

path: Optional[str] - File path this info applies to

21

source_path: Optional[str] - Source file where info was found

22

source_type: Optional[SourceType] - Type of source containing the info

23

"""

24

spdx_expressions: set[Expression] = field(default_factory=set)

25

copyright_lines: set[str] = field(default_factory=set)

26

contributor_lines: set[str] = field(default_factory=set)

27

path: Optional[str] = None

28

source_path: Optional[str] = None

29

source_type: Optional[SourceType] = None

30

```

31

32

```python { .api }

33

class SourceType(Enum):

34

"""

35

Enumeration representing types of sources for license information.

36

"""

37

DOT_LICENSE = "dot-license" # A .license file containing license information

38

FILE_HEADER = "file-header" # A file header containing license information

39

DEP5 = "dep5" # A .reuse/dep5 file containing license information

40

REUSE_TOML = "reuse-toml" # A REUSE.toml file containing license information

41

```

42

43

### ReuseInfo Manipulation

44

45

Methods for creating, copying, and combining ReuseInfo instances.

46

47

```python { .api }

48

def copy(self, **kwargs: Any) -> ReuseInfo:

49

"""

50

Return a copy of ReuseInfo, replacing the values of attributes with

51

the values from kwargs.

52

53

Args:

54

**kwargs: Attribute values to replace

55

56

Returns:

57

New ReuseInfo instance with updated attributes

58

59

Raises:

60

KeyError: If kwargs contains non-existent attributes

61

"""

62

63

def union(self, value: ReuseInfo) -> ReuseInfo:

64

"""

65

Return a new instance of ReuseInfo where all set attributes are equal

66

to the union of the set in self and the set in value.

67

68

All non-set attributes are set to their values in self.

69

70

Args:

71

value: ReuseInfo instance to union with

72

73

Returns:

74

New ReuseInfo instance with combined data

75

"""

76

77

def __or__(self, value: ReuseInfo) -> ReuseInfo:

78

"""Union operator support (| operator)."""

79

return self.union(value)

80

```

81

82

**Usage Examples:**

83

84

```python

85

from reuse import ReuseInfo, SourceType

86

87

# Create basic ReuseInfo

88

info1 = ReuseInfo(

89

copyright_lines={"2023 Jane Doe"},

90

source_path="example.py",

91

source_type=SourceType.FILE_HEADER

92

)

93

94

# Create another with different data

95

info2 = ReuseInfo(

96

copyright_lines={"2023 John Smith"},

97

spdx_expressions={"MIT"}

98

)

99

100

# Copy with modifications

101

modified_info = info1.copy(

102

copyright_lines={"2024 Jane Doe"},

103

spdx_expressions={"Apache-2.0"}

104

)

105

106

# Union two ReuseInfo instances

107

combined = info1.union(info2)

108

print(f"Combined copyrights: {combined.copyright_lines}")

109

# Output: {'2023 Jane Doe', '2023 John Smith'}

110

111

# Using union operator

112

combined_alt = info1 | info2 # Same as info1.union(info2)

113

```

114

115

### Content Analysis Methods

116

117

Methods for analyzing ReuseInfo content and compliance status.

118

119

```python { .api }

120

def contains_copyright_or_licensing(self) -> bool:

121

"""

122

Check if either spdx_expressions or copyright_lines is non-empty.

123

124

Returns:

125

True if the instance contains copyright or licensing information

126

"""

127

128

def contains_copyright_xor_licensing(self) -> bool:

129

"""

130

Check if exactly one of spdx_expressions or copyright_lines is non-empty.

131

132

Returns:

133

True if contains exactly one type of information (copyright XOR licensing)

134

"""

135

136

def contains_info(self) -> bool:

137

"""

138

Check if any field except path, source_path and source_type is non-empty.

139

140

Returns:

141

True if the instance contains any substantive REUSE information

142

"""

143

144

def __bool__(self) -> bool:

145

"""

146

Check if any attributes have values.

147

148

Returns:

149

True if any attribute is truthy

150

"""

151

```

152

153

**Usage Examples:**

154

155

```python

156

# Create ReuseInfo instances for testing

157

empty_info = ReuseInfo()

158

copyright_only = ReuseInfo(copyright_lines={"2023 Jane Doe"})

159

license_only = ReuseInfo(spdx_expressions={"MIT"})

160

complete_info = ReuseInfo(

161

copyright_lines={"2023 Jane Doe"},

162

spdx_expressions={"MIT"}

163

)

164

165

# Test content analysis methods

166

print(f"Empty has info: {empty_info.contains_info()}") # False

167

print(f"Copyright only has copyright or license: {copyright_only.contains_copyright_or_licensing()}") # True

168

print(f"License only has copyright XOR license: {license_only.contains_copyright_xor_licensing()}") # True

169

print(f"Complete info has copyright or license: {complete_info.contains_copyright_or_licensing()}") # True

170

print(f"Complete info has copyright XOR license: {complete_info.contains_copyright_xor_licensing()}") # False

171

172

# Boolean evaluation

173

print(f"Empty info is truthy: {bool(empty_info)}") # False

174

print(f"Complete info is truthy: {bool(complete_info)}") # True

175

```

176

177

### Content Extraction Functions

178

179

Functions for extracting REUSE information from text content and files.

180

181

```python { .api }

182

def extract_reuse_info(text: str) -> ReuseInfo:

183

"""

184

Extract REUSE info from text content.

185

186

Args:

187

text: Text content to analyze for REUSE information

188

189

Returns:

190

ReuseInfo instance containing extracted information

191

192

Note:

193

Searches for SPDX license identifiers, copyright statements,

194

and contributor information using pattern matching.

195

"""

196

197

def reuse_info_of_file(path: Path) -> ReuseInfo:

198

"""

199

Get REUSE info for specific file.

200

201

Args:

202

path: File path to analyze

203

204

Returns:

205

ReuseInfo instance for the file

206

207

Raises:

208

FileNotFoundError: If file doesn't exist

209

UnicodeDecodeError: If file can't be decoded as text

210

"""

211

212

def contains_reuse_info(text: str) -> bool:

213

"""

214

Check if text contains REUSE information.

215

216

Args:

217

text: Text content to check

218

219

Returns:

220

True if text contains REUSE licensing or copyright information

221

"""

222

```

223

224

**Usage Examples:**

225

226

```python

227

from reuse.extract import extract_reuse_info, contains_reuse_info

228

from pathlib import Path

229

230

# Extract from text content

231

file_content = '''

232

# SPDX-FileCopyrightText: 2023 Jane Doe <jane@example.com>

233

# SPDX-License-Identifier: MIT

234

235

def hello_world():

236

print("Hello, World!")

237

'''

238

239

info = extract_reuse_info(file_content)

240

print(f"Extracted licenses: {info.spdx_expressions}")

241

print(f"Extracted copyrights: {info.copyright_lines}")

242

243

# Check if content has REUSE info

244

has_info = contains_reuse_info(file_content)

245

print(f"Contains REUSE info: {has_info}")

246

247

# Extract from file

248

if Path("example.py").exists():

249

file_info = reuse_info_of_file(Path("example.py"))

250

print(f"File REUSE info: {file_info}")

251

```

252

253

### Text Processing Utilities

254

255

Utility functions for processing and manipulating text content.

256

257

```python { .api }

258

def find_spdx_tag(text: str, pattern: re.Pattern) -> Iterator[str]:

259

"""

260

Find SPDX tags in text using regex pattern.

261

262

Args:

263

text: Text to search

264

pattern: Compiled regex pattern for SPDX tags

265

266

Yields:

267

str: SPDX tag values found in text

268

"""

269

270

def filter_ignore_block(text: str) -> str:

271

"""

272

Filter ignored blocks from text.

273

274

Args:

275

text: Input text potentially containing ignore blocks

276

277

Returns:

278

Text with ignore blocks removed

279

280

Note:

281

Removes sections marked with REUSE-IgnoreStart/REUSE-IgnoreEnd comments.

282

"""

283

284

def detect_line_endings(text: str) -> str:

285

"""

286

Detect line ending style in text.

287

288

Args:

289

text: Text content to analyze

290

291

Returns:

292

Line ending character(s) detected ('\\n', '\\r\\n', or '\\r')

293

"""

294

```

295

296

**Usage Examples:**

297

298

```python

299

import re

300

from reuse.extract import find_spdx_tag, filter_ignore_block, detect_line_endings

301

302

# Find SPDX license identifiers

303

license_pattern = re.compile(r'SPDX-License-Identifier:\s*([^\n\r]*)')

304

text_with_licenses = "SPDX-License-Identifier: MIT\nSPDX-License-Identifier: GPL-3.0"

305

306

for license_id in find_spdx_tag(text_with_licenses, license_pattern):

307

print(f"Found license: {license_id}")

308

309

# Filter ignore blocks

310

text_with_ignore = '''

311

Some content

312

# REUSE-IgnoreStart

313

This content should be ignored

314

# REUSE-IgnoreEnd

315

More content

316

'''

317

318

filtered = filter_ignore_block(text_with_ignore)

319

print(f"Filtered text: {filtered}")

320

321

# Detect line endings

322

unix_text = "Line 1\nLine 2\n"

323

windows_text = "Line 1\r\nLine 2\r\n"

324

325

print(f"Unix endings: {repr(detect_line_endings(unix_text))}") # '\\n'

326

print(f"Windows endings: {repr(detect_line_endings(windows_text))}") # '\\r\\n'

327

```

328

329

### Binary File Handling

330

331

Functions for handling binary files and extracting text content.

332

333

```python { .api }

334

def decoded_text_from_binary(binary_data: bytes) -> str:

335

"""

336

Extract text from binary file data.

337

338

Args:

339

binary_data: Raw binary data from file

340

341

Returns:

342

Decoded text content

343

344

Raises:

345

UnicodeDecodeError: If binary data cannot be decoded as text

346

347

Note:

348

Attempts multiple encoding strategies (UTF-8, Latin-1, etc.)

349

and handles byte order marks (BOM).

350

"""

351

```

352

353

**Usage Examples:**

354

355

```python

356

from reuse.extract import decoded_text_from_binary

357

358

# Read binary file and decode

359

with open("example.py", "rb") as f:

360

binary_data = f.read()

361

362

try:

363

text_content = decoded_text_from_binary(binary_data)

364

# Now extract REUSE info from text

365

info = extract_reuse_info(text_content)

366

except UnicodeDecodeError:

367

print("File is not text or uses unsupported encoding")

368

```

369

370

## Complete REUSE Information Processing Example

371

372

```python

373

from reuse import ReuseInfo, SourceType

374

from reuse.extract import extract_reuse_info, contains_reuse_info

375

from pathlib import Path

376

377

def process_file_reuse_info(file_path: Path) -> dict:

378

"""Complete example of processing REUSE information."""

379

380

result = {

381

"file": str(file_path),

382

"has_reuse_info": False,

383

"licenses": [],

384

"copyrights": [],

385

"contributors": [],

386

"compliance_status": "unknown"

387

}

388

389

try:

390

# Read file content

391

with open(file_path, 'r', encoding='utf-8') as f:

392

content = f.read()

393

394

# Check if file contains REUSE info

395

if not contains_reuse_info(content):

396

result["compliance_status"] = "missing_info"

397

return result

398

399

# Extract REUSE information

400

info = extract_reuse_info(content)

401

402

if info.contains_info():

403

result["has_reuse_info"] = True

404

result["licenses"] = list(str(expr) for expr in info.spdx_expressions)

405

result["copyrights"] = list(info.copyright_lines)

406

result["contributors"] = list(info.contributor_lines)

407

408

# Determine compliance status

409

if info.contains_copyright_or_licensing():

410

if info.spdx_expressions and info.copyright_lines:

411

result["compliance_status"] = "compliant"

412

elif info.contains_copyright_xor_licensing():

413

result["compliance_status"] = "partial"

414

else:

415

result["compliance_status"] = "missing_info"

416

else:

417

result["compliance_status"] = "missing_info"

418

419

except (FileNotFoundError, UnicodeDecodeError) as e:

420

result["error"] = str(e)

421

result["compliance_status"] = "error"

422

423

return result

424

425

# Usage

426

file_analysis = process_file_reuse_info(Path("src/example.py"))

427

print(f"File: {file_analysis['file']}")

428

print(f"Compliance: {file_analysis['compliance_status']}")

429

print(f"Licenses: {file_analysis['licenses']}")

430

print(f"Copyrights: {file_analysis['copyrights']}")

431

```