0
# Core Parsing
1
2
Complete Markdown parsing functionality that converts text input into a structured Abstract Syntax Tree (AST). The parser supports all CommonMark specification features with extensive configuration options for customization and extensibility.
3
4
## Capabilities
5
6
### Parser
7
8
Main parsing class that converts CommonMark text to AST nodes.
9
10
```java { .api }
11
/**
12
* Parses input text to a tree of nodes.
13
* Thread-safe parser that can be reused across multiple parsing operations.
14
*/
15
public class Parser {
16
/**
17
* Create a new builder for configuring a Parser
18
* @return a builder instance
19
*/
20
public static Builder builder();
21
22
/**
23
* Parse the specified input text into a tree of nodes.
24
* This method is thread-safe (a new parser state is used for each invocation).
25
* @param input the text to parse - must not be null
26
* @return the root node (Document)
27
*/
28
public Node parse(String input);
29
30
/**
31
* Parse the specified reader into a tree of nodes. The caller is responsible for closing the reader.
32
* This method is thread-safe (a new parser state is used for each invocation).
33
* @param input the reader to parse - must not be null
34
* @return the root node (Document)
35
* @throws IOException when reading throws an exception
36
*/
37
public Node parseReader(Reader input) throws IOException;
38
}
39
```
40
41
**Usage Examples:**
42
43
```java
44
import org.commonmark.parser.Parser;
45
import org.commonmark.node.Node;
46
47
// Basic parsing
48
Parser parser = Parser.builder().build();
49
Node document = parser.parse("# Hello World\n\nThis is **bold** text.");
50
51
// Parse from file
52
try (FileReader reader = new FileReader("document.md")) {
53
Node document = parser.parseReader(reader);
54
}
55
56
// Parse with custom configuration
57
Parser customParser = Parser.builder()
58
.includeSourceSpans(IncludeSourceSpans.BLOCKS)
59
.build();
60
Node documentWithSpans = customParser.parse("# Heading");
61
```
62
63
### Parser Builder
64
65
Builder class for configuring Parser instances with custom options and extensions.
66
67
```java { .api }
68
/**
69
* Builder for configuring a Parser
70
*/
71
public static class Parser.Builder {
72
/**
73
* Build the configured Parser instance
74
* @return the configured Parser
75
*/
76
public Parser build();
77
78
/**
79
* Add extensions to use on this parser
80
* @param extensions extensions to use on this parser
81
* @return this builder
82
*/
83
public Builder extensions(Iterable<? extends Extension> extensions);
84
85
/**
86
* Describe the list of markdown features the parser will recognize and parse.
87
* By default, all CommonMark block types are enabled.
88
* @param enabledBlockTypes A set of block nodes the parser will parse
89
* @return this builder
90
*/
91
public Builder enabledBlockTypes(Set<Class<? extends Block>> enabledBlockTypes);
92
93
/**
94
* Whether to calculate source spans for nodes
95
* @param includeSourceSpans which kind of source spans should be included
96
* @return this builder
97
*/
98
public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans);
99
100
/**
101
* Add a custom block parser factory
102
* @param blockParserFactory a block parser factory implementation
103
* @return this builder
104
*/
105
public Builder customBlockParserFactory(BlockParserFactory blockParserFactory);
106
107
/**
108
* Add a custom delimiter processor
109
* @param delimiterProcessor a delimiter processor implementation
110
* @return this builder
111
*/
112
public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor);
113
114
/**
115
* Add a post processor that runs after parsing
116
* @param postProcessor a post processor implementation
117
* @return this builder
118
*/
119
public Builder postProcessor(PostProcessor postProcessor);
120
121
/**
122
* Override the parser used for inline markdown processing
123
* @param inlineParserFactory an inline parser factory implementation
124
* @return this builder
125
*/
126
public Builder inlineParserFactory(InlineParserFactory inlineParserFactory);
127
}
128
```
129
130
**Usage Examples:**
131
132
```java
133
import org.commonmark.parser.Parser;
134
import org.commonmark.parser.IncludeSourceSpans;
135
import org.commonmark.node.*;
136
137
// Enable only specific block types
138
Set<Class<? extends Block>> enabledTypes = new HashSet<>(Arrays.asList(
139
Heading.class,
140
Paragraph.class,
141
ListBlock.class
142
));
143
Parser restrictedParser = Parser.builder()
144
.enabledBlockTypes(enabledTypes)
145
.build();
146
147
// Include source span information
148
Parser spanParser = Parser.builder()
149
.includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
150
.build();
151
Node document = spanParser.parse("# Title");
152
List<SourceSpan> spans = document.getSourceSpans();
153
154
// Add custom post-processor
155
Parser processedParser = Parser.builder()
156
.postProcessor(new MyCustomPostProcessor())
157
.build();
158
```
159
160
### Parser Configuration
161
162
Configuration enums and interfaces for customizing parser behavior.
163
164
```java { .api }
165
/**
166
* Configuration for including source span information
167
*/
168
public enum IncludeSourceSpans {
169
/** Do not include source spans */
170
NONE,
171
/** Include source spans on block nodes */
172
BLOCKS,
173
/** Include source spans on all nodes */
174
BLOCKS_AND_INLINES
175
}
176
177
/**
178
* Post-processes nodes after parsing
179
*/
180
public interface PostProcessor {
181
/**
182
* Process the document node after parsing is complete
183
* @param node the document node to process
184
* @return the processed node (may be the same or a new node)
185
*/
186
Node process(Node node);
187
}
188
189
/**
190
* Factory for creating inline parsers
191
*/
192
public interface InlineParserFactory {
193
/**
194
* Create an inline parser with the given context
195
* @param inlineParserContext the context for inline parsing
196
* @return an inline parser instance
197
*/
198
InlineParser create(InlineParserContext inlineParserContext);
199
}
200
201
/**
202
* Parser factory for a block node for determining when a block starts.
203
* Implementations should subclass AbstractBlockParserFactory instead of implementing this directly.
204
*/
205
public interface BlockParserFactory {
206
/**
207
* Try to start parsing a block at the current position
208
* @param state the current parser state
209
* @param matchedBlockParser the matched block parser
210
* @return a BlockStart if this factory can start a block, null otherwise
211
*/
212
BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser);
213
}
214
215
/**
216
* Custom delimiter processor for additional delimiters besides underscore and asterisk.
217
* Implementations must be thread-safe as the same instance may be used by multiple parsers.
218
*/
219
public interface DelimiterProcessor {
220
/**
221
* Get the character that marks the beginning of a delimited node
222
* @return the opening character, must not clash with built-in special characters
223
*/
224
char getOpeningCharacter();
225
226
/**
227
* Get the character that marks the ending of a delimited node
228
* @return the closing character, must not clash with built-in special characters
229
*/
230
char getClosingCharacter();
231
232
/**
233
* Minimum number of delimiter characters needed to activate this processor
234
* @return the minimum length, must be at least 1
235
*/
236
int getMinLength();
237
238
/**
239
* Process the delimiter runs
240
* @param openingRun the opening delimiter run
241
* @param closingRun the closing delimiter run
242
* @return how many delimiters were used; must not be greater than length of either opener or closer
243
*/
244
int process(DelimiterRun openingRun, DelimiterRun closingRun);
245
}
246
```
247
248
### Source Management
249
250
Classes for handling source input and tracking source positions.
251
252
```java { .api }
253
/**
254
* Represents a line from source input
255
*/
256
public class SourceLine {
257
/**
258
* Get the content of this source line
259
* @return the line content
260
*/
261
public CharSequence getContent();
262
263
/**
264
* Get the line number (0-based)
265
* @return the line number
266
*/
267
public int getLineIndex();
268
}
269
270
/**
271
* Set of source lines
272
*/
273
public class SourceLines {
274
/**
275
* Create an empty SourceLines
276
* @return empty SourceLines instance
277
*/
278
public static SourceLines empty();
279
280
/**
281
* Create SourceLines from a single line
282
* @param sourceLine the source line
283
* @return SourceLines containing the single line
284
*/
285
public static SourceLines of(SourceLine sourceLine);
286
287
/**
288
* Add a line to this SourceLines
289
* @param sourceLine the line to add
290
*/
291
public void addLine(SourceLine sourceLine);
292
293
/**
294
* Get all lines
295
* @return list of source lines
296
*/
297
public List<SourceLine> getLines();
298
299
/**
300
* Check if this SourceLines is empty
301
* @return true if empty
302
*/
303
public boolean isEmpty();
304
305
/**
306
* Get the combined content of all lines
307
* @return the content as a string
308
*/
309
public String getContent();
310
}
311
312
/**
313
* References a snippet of text from source input
314
*/
315
public class SourceSpan {
316
/**
317
* Create a source span
318
* @param lineIndex the line index (0-based)
319
* @param columnIndex the column index (0-based)
320
* @param length the length of the span
321
* @return the source span
322
*/
323
public static SourceSpan of(int lineIndex, int columnIndex, int length);
324
325
/**
326
* Get the line index
327
* @return the line index (0-based)
328
*/
329
public int getLineIndex();
330
331
/**
332
* Get the column index
333
* @return the column index (0-based)
334
*/
335
public int getColumnIndex();
336
337
/**
338
* Get the length of the span
339
* @return the length
340
*/
341
public int getLength();
342
}
343
```
344
345
**Usage Examples:**
346
347
```java
348
import org.commonmark.parser.SourceLine;
349
import org.commonmark.parser.SourceLines;
350
import org.commonmark.node.SourceSpan;
351
352
// Working with source spans
353
Parser parser = Parser.builder()
354
.includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
355
.build();
356
357
Node document = parser.parse("# Heading\n\nParagraph with *emphasis*");
358
List<SourceSpan> documentSpans = document.getSourceSpans();
359
360
// Find all heading nodes and their source positions
361
document.accept(new AbstractVisitor() {
362
@Override
363
public void visit(Heading heading) {
364
List<SourceSpan> spans = heading.getSourceSpans();
365
for (SourceSpan span : spans) {
366
System.out.println("Heading at line " + span.getLineIndex() +
367
", column " + span.getColumnIndex());
368
}
369
super.visit(heading);
370
}
371
});
372
```