0
# Link Scanning
1
2
Core link checking functionality for validating URLs and local files. Provides both synchronous batch processing and event-driven real-time scanning with comprehensive error handling and retry mechanisms.
3
4
## Capabilities
5
6
### Check Function
7
8
Convenience method to perform a complete link scan without manually instantiating the LinkChecker class.
9
10
```typescript { .api }
11
/**
12
* Crawl a given url or path, and return a list of visited links along with status codes
13
* @param options - Configuration options for the link checking operation
14
* @returns Promise resolving to crawl results with pass/fail status and individual link results
15
*/
16
function check(options: CheckOptions): Promise<CrawlResult>;
17
```
18
19
**Usage Examples:**
20
21
```typescript
22
import { check, LinkState } from "linkinator";
23
24
// Check a single URL
25
const result = await check({ path: "https://example.com" });
26
console.log(`Scan passed: ${result.passed}`);
27
28
// Check local directory with recursion
29
const localResult = await check({
30
path: "./website/",
31
recurse: true,
32
markdown: true,
33
concurrency: 50,
34
});
35
36
// Check multiple paths
37
const multiResult = await check({
38
path: ["https://example.com", "https://api.example.com"],
39
timeout: 10000,
40
});
41
```
42
43
### LinkChecker Class
44
45
Instance class providing event-driven link checking with real-time progress updates and detailed control over the scanning process.
46
47
```typescript { .api }
48
/**
49
* Instance class used to perform a crawl job with event emission capabilities
50
*/
51
class LinkChecker extends EventEmitter {
52
/**
53
* Crawl given URLs or paths and return comprehensive results
54
* @param options - Check options specifying what and how to scan
55
* @returns Promise resolving to complete crawl results
56
*/
57
check(options: CheckOptions): Promise<CrawlResult>;
58
59
/**
60
* Listen for individual link check results as they complete
61
* @param event - 'link' event type
62
* @param listener - Callback receiving LinkResult for each checked link
63
*/
64
on(event: 'link', listener: (result: LinkResult) => void): this;
65
66
/**
67
* Listen for page scanning start events
68
* @param event - 'pagestart' event type
69
* @param listener - Callback receiving URL of page being scanned
70
*/
71
on(event: 'pagestart', listener: (link: string) => void): this;
72
73
/**
74
* Listen for retry attempts on failed requests
75
* @param event - 'retry' event type
76
* @param listener - Callback receiving retry details including timing
77
*/
78
on(event: 'retry', listener: (details: RetryInfo) => void): this;
79
}
80
```
81
82
**Usage Examples:**
83
84
```typescript
85
import { LinkChecker, LinkState } from "linkinator";
86
87
// Event-driven scanning with progress updates
88
const checker = new LinkChecker();
89
let checkedCount = 0;
90
let brokenCount = 0;
91
92
checker.on('link', (result) => {
93
checkedCount++;
94
if (result.state === LinkState.BROKEN) {
95
brokenCount++;
96
console.log(`Broken link found: ${result.url} (${result.status})`);
97
}
98
console.log(`Progress: ${checkedCount} links checked, ${brokenCount} broken`);
99
});
100
101
checker.on('pagestart', (url) => {
102
console.log(`Scanning page: ${url}`);
103
});
104
105
checker.on('retry', (details) => {
106
console.log(`Retrying ${details.url} in ${details.secondsUntilRetry} seconds`);
107
});
108
109
const result = await checker.check({
110
path: "https://example.com",
111
recurse: true,
112
retry: true,
113
retryErrors: true,
114
});
115
```
116
117
### Link State Management
118
119
Link checking results are categorized into distinct states for easy filtering and processing.
120
121
```typescript { .api }
122
/**
123
* Enumeration of possible link states after checking
124
*/
125
enum LinkState {
126
/** Link is accessible and returned a successful response */
127
OK = 'OK',
128
/** Link is broken, inaccessible, or returned an error response */
129
BROKEN = 'BROKEN',
130
/** Link was skipped due to filtering rules or unsupported protocol */
131
SKIPPED = 'SKIPPED',
132
}
133
```
134
135
**Usage Examples:**
136
137
```typescript
138
import { check, LinkState } from "linkinator";
139
140
const result = await check({ path: "https://example.com" });
141
142
// Filter results by state
143
const okLinks = result.links.filter(link => link.state === LinkState.OK);
144
const brokenLinks = result.links.filter(link => link.state === LinkState.BROKEN);
145
const skippedLinks = result.links.filter(link => link.state === LinkState.SKIPPED);
146
147
console.log(`✓ ${okLinks.length} working links`);
148
console.log(`✗ ${brokenLinks.length} broken links`);
149
console.log(`⊘ ${skippedLinks.length} skipped links`);
150
151
// Process broken links
152
brokenLinks.forEach(link => {
153
console.log(`${link.url} (${link.status}) - found in ${link.parent}`);
154
if (link.failureDetails) {
155
console.log(` Error details: ${link.failureDetails.length} failures`);
156
}
157
});
158
```
159
160
## Types
161
162
```typescript { .api }
163
interface CrawlResult {
164
/** Whether the scan passed (no broken links found) */
165
passed: boolean;
166
/** Array of results for each link that was checked */
167
links: LinkResult[];
168
}
169
170
interface LinkResult {
171
/** The URL that was checked */
172
url: string;
173
/** HTTP status code if available */
174
status?: number;
175
/** Current state of the link (OK/BROKEN/SKIPPED) */
176
state: LinkState;
177
/** Parent URL that contained this link */
178
parent?: string;
179
/** Detailed error information for failed links */
180
failureDetails?: Array<Error | GaxiosResponse>;
181
}
182
183
interface RetryInfo {
184
/** URL being retried */
185
url: string;
186
/** Number of seconds until the retry attempt */
187
secondsUntilRetry: number;
188
/** HTTP status code that triggered the retry */
189
status: number;
190
}
191
192
interface GaxiosResponse {
193
status: number;
194
statusText: string;
195
headers: Record<string, string>;
196
data: any;
197
config: any;
198
request?: any;
199
}
200
201
interface ParsedUrl {
202
/** The original link string that was parsed */
203
link: string;
204
/** Any error that occurred during URL parsing */
205
error?: Error;
206
/** The successfully parsed URL object */
207
url?: URL;
208
}
209
```