0
# Copy-Paste Detection
1
2
Lexical analysis for detecting code duplication within Apex codebases. Integrates with PMD's Copy-Paste Detection (CPD) framework to identify similar code blocks.
3
4
## Capabilities
5
6
### CPD Lexer
7
8
Tokenizes Apex code for duplicate detection analysis.
9
10
```java { .api }
11
/**
12
* Lexer for copy-paste detection in Apex code
13
* Tokenizes Apex source code for duplicate block detection
14
*/
15
public class ApexCpdLexer {
16
/**
17
* Tokenize Apex code for duplicate detection
18
* @param textDocument - Source code document to tokenize
19
* @param tokenFactory - Factory for creating tokens
20
*/
21
public void tokenize(TextDocument textDocument, TokenFactory tokenFactory);
22
}
23
```
24
25
### Token Factory
26
27
Factory interface for creating tokens during lexical analysis.
28
29
```java { .api }
30
/**
31
* Factory for creating tokens during CPD analysis
32
* Part of PMD core framework
33
*/
34
public interface TokenFactory {
35
/**
36
* Create a token for the given text and location
37
* @param image - Token text content
38
* @param beginLine - Starting line number
39
* @param beginColumn - Starting column number
40
* @param endLine - Ending line number
41
* @param endColumn - Ending column number
42
*/
43
void recordToken(String image, int beginLine, int beginColumn, int endLine, int endColumn);
44
}
45
```
46
47
### Text Document
48
49
Represents source code document for analysis.
50
51
```java { .api }
52
/**
53
* Represents a source code document for analysis
54
* Part of PMD core framework
55
*/
56
public interface TextDocument {
57
/** Get the full text content */
58
String getText();
59
60
/** Get the file name or identifier */
61
String getDisplayName();
62
63
/** Get character at specific position */
64
char charAt(int index);
65
66
/** Get length of document */
67
int getLength();
68
}
69
```
70
71
**Usage Examples:**
72
73
```java
74
import net.sourceforge.pmd.lang.apex.cpd.ApexCpdLexer;
75
import net.sourceforge.pmd.cpd.*;
76
77
// Create CPD lexer for Apex
78
ApexCpdLexer lexer = new ApexCpdLexer();
79
80
// Create token factory for collecting tokens
81
List<Token> tokens = new ArrayList<>();
82
TokenFactory tokenFactory = new TokenFactory() {
83
@Override
84
public void recordToken(String image, int beginLine, int beginColumn, int endLine, int endColumn) {
85
Token token = new Token(image, beginLine, beginColumn, endLine, endColumn);
86
tokens.add(token);
87
}
88
};
89
90
// Create text document from Apex source
91
String apexCode = """
92
public class MyClass {
93
public void method1() {
94
System.debug('Hello World');
95
Integer x = 5;
96
if (x > 0) {
97
System.debug('Positive');
98
}
99
}
100
101
public void method2() {
102
System.debug('Hello World');
103
Integer y = 10;
104
if (y > 0) {
105
System.debug('Positive');
106
}
107
}
108
}
109
""";
110
111
TextDocument document = new TextDocument() {
112
@Override
113
public String getText() { return apexCode; }
114
115
@Override
116
public String getDisplayName() { return "MyClass.cls"; }
117
118
@Override
119
public char charAt(int index) { return apexCode.charAt(index); }
120
121
@Override
122
public int getLength() { return apexCode.length(); }
123
};
124
125
// Tokenize the document
126
lexer.tokenize(document, tokenFactory);
127
128
// Tokens are now available for duplicate detection
129
System.out.println("Generated " + tokens.size() + " tokens for CPD analysis");
130
131
// Use with PMD CPD framework
132
CPDConfiguration config = new CPDConfiguration();
133
config.setMinimumTileSize(50); // Minimum duplicate block size
134
config.setLanguage(ApexLanguageModule.getInstance());
135
136
CPD cpd = new CPD(config);
137
cpd.add(document);
138
139
// Find duplicates
140
List<Mark> duplicates = cpd.getMatches();
141
for (Mark duplicate : duplicates) {
142
System.out.println("Found duplicate code at line " + duplicate.getBeginLine() +
143
" in " + duplicate.getTokenSrcID());
144
}
145
146
// Integration with language module
147
ApexLanguageModule apexModule = ApexLanguageModule.getInstance();
148
LanguagePropertyBundle properties = apexModule.newPropertyBundle();
149
Lexer cpdLexer = apexModule.createCpdLexer(properties);
150
151
// The returned lexer is the same ApexCpdLexer instance
152
assert cpdLexer instanceof ApexCpdLexer;
153
```