or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

ast.mdcpd.mdindex.mdlanguage-module.mdmetrics.mdmultifile.mdrules.md

cpd.mddocs/

0

# Copy-Paste Detection

1

2

Lexical analysis for detecting code duplication within Apex codebases. Integrates with PMD's Copy-Paste Detection (CPD) framework to identify similar code blocks.

3

4

## Capabilities

5

6

### CPD Lexer

7

8

Tokenizes Apex code for duplicate detection analysis.

9

10

```java { .api }

11

/**

12

* Lexer for copy-paste detection in Apex code

13

* Tokenizes Apex source code for duplicate block detection

14

*/

15

public class ApexCpdLexer {

16

/**

17

* Tokenize Apex code for duplicate detection

18

* @param textDocument - Source code document to tokenize

19

* @param tokenFactory - Factory for creating tokens

20

*/

21

public void tokenize(TextDocument textDocument, TokenFactory tokenFactory);

22

}

23

```

24

25

### Token Factory

26

27

Factory interface for creating tokens during lexical analysis.

28

29

```java { .api }

30

/**

31

* Factory for creating tokens during CPD analysis

32

* Part of PMD core framework

33

*/

34

public interface TokenFactory {

35

/**

36

* Create a token for the given text and location

37

* @param image - Token text content

38

* @param beginLine - Starting line number

39

* @param beginColumn - Starting column number

40

* @param endLine - Ending line number

41

* @param endColumn - Ending column number

42

*/

43

void recordToken(String image, int beginLine, int beginColumn, int endLine, int endColumn);

44

}

45

```

46

47

### Text Document

48

49

Represents source code document for analysis.

50

51

```java { .api }

52

/**

53

* Represents a source code document for analysis

54

* Part of PMD core framework

55

*/

56

public interface TextDocument {

57

/** Get the full text content */

58

String getText();

59

60

/** Get the file name or identifier */

61

String getDisplayName();

62

63

/** Get character at specific position */

64

char charAt(int index);

65

66

/** Get length of document */

67

int getLength();

68

}

69

```

70

71

**Usage Examples:**

72

73

```java

74

import net.sourceforge.pmd.lang.apex.cpd.ApexCpdLexer;

75

import net.sourceforge.pmd.cpd.*;

76

77

// Create CPD lexer for Apex

78

ApexCpdLexer lexer = new ApexCpdLexer();

79

80

// Create token factory for collecting tokens

81

List<Token> tokens = new ArrayList<>();

82

TokenFactory tokenFactory = new TokenFactory() {

83

@Override

84

public void recordToken(String image, int beginLine, int beginColumn, int endLine, int endColumn) {

85

Token token = new Token(image, beginLine, beginColumn, endLine, endColumn);

86

tokens.add(token);

87

}

88

};

89

90

// Create text document from Apex source

91

String apexCode = """

92

public class MyClass {

93

public void method1() {

94

System.debug('Hello World');

95

Integer x = 5;

96

if (x > 0) {

97

System.debug('Positive');

98

}

99

}

100

101

public void method2() {

102

System.debug('Hello World');

103

Integer y = 10;

104

if (y > 0) {

105

System.debug('Positive');

106

}

107

}

108

}

109

""";

110

111

TextDocument document = new TextDocument() {

112

@Override

113

public String getText() { return apexCode; }

114

115

@Override

116

public String getDisplayName() { return "MyClass.cls"; }

117

118

@Override

119

public char charAt(int index) { return apexCode.charAt(index); }

120

121

@Override

122

public int getLength() { return apexCode.length(); }

123

};

124

125

// Tokenize the document

126

lexer.tokenize(document, tokenFactory);

127

128

// Tokens are now available for duplicate detection

129

System.out.println("Generated " + tokens.size() + " tokens for CPD analysis");

130

131

// Use with PMD CPD framework

132

CPDConfiguration config = new CPDConfiguration();

133

config.setMinimumTileSize(50); // Minimum duplicate block size

134

config.setLanguage(ApexLanguageModule.getInstance());

135

136

CPD cpd = new CPD(config);

137

cpd.add(document);

138

139

// Find duplicates

140

List<Mark> duplicates = cpd.getMatches();

141

for (Mark duplicate : duplicates) {

142

System.out.println("Found duplicate code at line " + duplicate.getBeginLine() +

143

" in " + duplicate.getTokenSrcID());

144

}

145

146

// Integration with language module

147

ApexLanguageModule apexModule = ApexLanguageModule.getInstance();

148

LanguagePropertyBundle properties = apexModule.newPropertyBundle();

149

Lexer cpdLexer = apexModule.createCpdLexer(properties);

150

151

// The returned lexer is the same ApexCpdLexer instance

152

assert cpdLexer instanceof ApexCpdLexer;

153

```