or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

chooser.mdcommand-line.mdconfiguration.mddiff-utilities.mdfile-utilities.mdformatters.mdgit-integration.mdindex.mdmain-functions.mdpreprocessors.mdverification.md

diff-utilities.mddocs/

0

# Diff Processing Utilities

1

2

Utilities for diffing text files and processing diff opcodes to extract changed line numbers and content chunks. These functions form the foundation for comparing original code with reformatted code.

3

4

## Capabilities

5

6

### Diff Generation

7

8

Functions for generating and processing diffs between text documents.

9

10

```python { .api }

11

def diff_and_get_opcodes(src: TextDocument, dst: TextDocument) -> List[Tuple[str, int, int, int, int]]:

12

"""

13

Generate diff opcodes between source and destination documents.

14

15

Divides a diff between the original and reformatted content into

16

alternating chunks of intact (represented by the 'equal' tag) and

17

modified ('delete', 'replace' or 'insert' tag) lines.

18

19

Parameters:

20

- src: Source document (original content)

21

- dst: Destination document (reformatted content)

22

23

Returns:

24

List of opcodes, where each opcode is a tuple of:

25

(tag, src_start, src_end, dst_start, dst_end)

26

27

Example opcode: ('replace', 0, 1, 0, 2) means replace line 0-1 in src

28

with lines 0-2 in dst

29

"""

30

31

def opcodes_to_chunks(

32

opcodes: List[Tuple[str, int, int, int, int]],

33

src: TextDocument,

34

dst: TextDocument

35

) -> Generator[DiffChunk, None, None]:

36

"""

37

Convert diff opcodes into chunks with line content and offsets.

38

39

Picks the lines from original and reformatted content for each opcode

40

and combines line content with the 1-based line offset in the original content.

41

42

Parameters:

43

- opcodes: List of diff opcodes from diff_and_get_opcodes

44

- src: Source document (original content)

45

- dst: Destination document (reformatted content)

46

47

Yields:

48

DiffChunk objects containing:

49

- original_lines_offset: 1-based line number where chunk starts in original

50

- original_lines: List of original line content

51

- formatted_lines: List of reformatted line content

52

"""

53

```

54

55

### Line Number Extraction

56

57

Functions for extracting changed line numbers from diff opcodes.

58

59

```python { .api }

60

def opcodes_to_edit_linenums(opcodes: List[Tuple[str, int, int, int, int]]) -> List[int]:

61

"""

62

Convert diff opcodes to a list of changed line numbers.

63

64

Parameters:

65

- opcodes: List of diff opcodes

66

67

Returns:

68

List of 1-based line numbers that were changed

69

"""

70

```

71

72

## Usage Examples

73

74

### Basic Diff Processing

75

76

```python

77

from darker.diff import diff_and_get_opcodes, opcodes_to_chunks

78

from darkgraylib.utils import TextDocument

79

80

# Create example documents

81

original = TextDocument.from_lines([

82

'for i in range(5): print(i)',

83

'print("done")'

84

])

85

86

reformatted = TextDocument.from_lines([

87

'for i in range(5):',

88

' print(i)',

89

'print("done")'

90

])

91

92

# Generate diff opcodes

93

opcodes = diff_and_get_opcodes(original, reformatted)

94

print(f"Generated {len(opcodes)} opcodes:")

95

96

for opcode in opcodes:

97

tag, src_start, src_end, dst_start, dst_end = opcode

98

print(f" {tag}: src[{src_start}:{src_end}] -> dst[{dst_start}:{dst_end}]")

99

100

# Convert to chunks

101

chunks = list(opcodes_to_chunks(opcodes, original, reformatted))

102

print(f"\nGenerated {len(chunks)} chunks:")

103

104

for i, chunk in enumerate(chunks):

105

print(f" Chunk {i+1}: starts at line {chunk.original_lines_offset}")

106

print(f" Original: {chunk.original_lines}")

107

print(f" Formatted: {chunk.formatted_lines}")

108

```

109

110

### Extract Changed Line Numbers

111

112

```python

113

from darker.diff import diff_and_get_opcodes, opcodes_to_edit_linenums

114

from darkgraylib.utils import TextDocument

115

116

# Compare git working tree version with formatted version

117

with open("myfile.py") as f:

118

current_content = TextDocument.from_str(f.read())

119

120

# Assume we have formatted content from Black/Ruff/etc

121

# formatted_content = apply_formatter(current_content)

122

123

# Get diff and extract changed lines

124

opcodes = diff_and_get_opcodes(current_content, formatted_content)

125

changed_lines = opcodes_to_edit_linenums(opcodes)

126

127

if changed_lines:

128

print(f"Formatter would change lines: {changed_lines}")

129

else:

130

print("No formatting changes needed")

131

```

132

133

### Integration with Formatter Output

134

135

```python

136

from darker.diff import diff_and_get_opcodes, opcodes_to_chunks

137

from darker.chooser import choose_lines

138

from darker.git import EditedLinenumsDiffer

139

from darkgraylib.utils import TextDocument

140

from pathlib import Path

141

142

def apply_selective_formatting(file_path: Path, original: TextDocument, formatted: TextDocument, git_edits: List[int]) -> TextDocument:

143

"""Apply formatting only to git-edited regions."""

144

145

# Get diff chunks between original and formatted

146

opcodes = diff_and_get_opcodes(original, formatted)

147

chunks = list(opcodes_to_chunks(opcodes, original, formatted))

148

149

# Choose lines based on git edits

150

selected_lines = list(choose_lines(chunks, git_edits))

151

152

# Reconstruct document

153

return TextDocument.from_lines(selected_lines)

154

155

# Example usage

156

file_path = Path("src/module.py")

157

with open(file_path) as f:

158

original_content = TextDocument.from_str(f.read())

159

160

# Get git edits (implementation depends on git integration)

161

# git_edited_lines = get_git_edited_lines(file_path)

162

163

# Apply formatter (Black, Ruff, etc.)

164

# formatted_content = apply_formatter(original_content)

165

166

# Apply selective formatting

167

# result = apply_selective_formatting(file_path, original_content, formatted_content, git_edited_lines)

168

```

169

170

### Advanced Opcode Analysis

171

172

```python

173

from darker.diff import diff_and_get_opcodes

174

175

def analyze_diff_opcodes(opcodes):

176

"""Analyze diff opcodes to understand change patterns."""

177

stats = {

178

'equal': 0, # unchanged chunks

179

'delete': 0, # deleted lines

180

'insert': 0, # inserted lines

181

'replace': 0 # modified lines

182

}

183

184

for tag, src_start, src_end, dst_start, dst_end in opcodes:

185

stats[tag] += 1

186

187

if tag == 'equal':

188

print(f"Unchanged: {src_end - src_start} lines")

189

elif tag == 'delete':

190

print(f"Deleted: lines {src_start+1}-{src_end}")

191

elif tag == 'insert':

192

print(f"Inserted: {dst_end - dst_start} lines at position {src_start+1}")

193

elif tag == 'replace':

194

print(f"Replaced: lines {src_start+1}-{src_end} with {dst_end - dst_start} lines")

195

196

return stats

197

198

# Usage

199

opcodes = diff_and_get_opcodes(original_doc, formatted_doc)

200

stats = analyze_diff_opcodes(opcodes)

201

print(f"Diff summary: {stats}")

202

```