or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

character-parsing.mdcombinators.mdcore-primitives.mdindex.mdparser-generation.mdparser-operators.md

character-parsing.mddocs/

0

# Character and String Parsing

1

2

Specialized parsers for character-level and string-level text processing. These parsers handle literal string matching, regular expression patterns, character class matching, whitespace processing, and end-of-input detection.

3

4

## Capabilities

5

6

### String Parsing

7

8

Parse literal strings and regular expression patterns with full control over matching behavior.

9

10

```python { .api }

11

def string(s):

12

"""

13

Parse a literal string.

14

15

Args:

16

s (str): The exact string to match

17

18

Returns:

19

Parser: Parser that returns the matched string on success

20

21

Note:

22

Fails if the input doesn't exactly match the string s.

23

"""

24

25

def regex(exp, flags=0):

26

"""

27

Parse according to a regular expression pattern.

28

29

Args:

30

exp (str or compiled regex): Regular expression pattern

31

flags (int, optional): Regex flags (re.IGNORECASE, etc.)

32

33

Returns:

34

Parser: Parser that returns the matched text

35

36

Note:

37

Uses re.match() which anchors to current position.

38

Returns the full matched text (group 0).

39

"""

40

```

41

42

### Character Class Parsing

43

44

Parse individual characters based on membership in character sets or character properties.

45

46

```python { .api }

47

def one_of(s):

48

"""

49

Parse a character that appears in the given string.

50

51

Args:

52

s (str): String containing allowed characters

53

54

Returns:

55

Parser: Parser that returns the matched character

56

57

Example:

58

one_of("abc") matches 'a', 'b', or 'c'

59

"""

60

61

def none_of(s):

62

"""

63

Parse a character that does NOT appear in the given string.

64

65

Args:

66

s (str): String containing forbidden characters

67

68

Returns:

69

Parser: Parser that returns the matched character

70

71

Example:

72

none_of("abc") matches any character except 'a', 'b', or 'c'

73

"""

74

75

def letter():

76

"""

77

Parse an alphabetic character.

78

79

Returns:

80

Parser: Parser that returns the matched letter

81

82

Note:

83

Uses Python's str.isalpha() method.

84

"""

85

86

def digit():

87

"""

88

Parse a numeric digit character.

89

90

Returns:

91

Parser: Parser that returns the matched digit character

92

93

Note:

94

Uses Python's str.isdigit() method.

95

"""

96

```

97

98

### Whitespace Parsing

99

100

Handle whitespace characters and common whitespace patterns in text processing.

101

102

```python { .api }

103

def space():

104

"""

105

Parse a single whitespace character.

106

107

Returns:

108

Parser: Parser that returns the matched whitespace character

109

110

Note:

111

Uses Python's str.isspace() method (spaces, tabs, newlines, etc.).

112

"""

113

114

def spaces():

115

"""

116

Parse zero or more whitespace characters.

117

118

Returns:

119

Parser: Parser that returns list of whitespace characters

120

121

Note:

122

Always succeeds, returns empty list if no whitespace found.

123

Equivalent to many(space()).

124

"""

125

```

126

127

### End-of-Input Detection

128

129

Detect when the parser has reached the end of the input text.

130

131

```python { .api }

132

def eof():

133

"""

134

Parse end-of-file (end of input string).

135

136

Returns:

137

Parser: Parser that returns None when at end of input

138

139

Note:

140

Succeeds only when no more characters remain to be parsed.

141

Useful for ensuring entire input is consumed.

142

"""

143

```

144

145

## Usage Examples

146

147

### String Matching

148

149

```python

150

from parsec import string, regex

151

152

# Literal string matching

153

hello = string("hello")

154

result = hello.parse("hello world") # Returns "hello"

155

156

# Case-sensitive matching

157

parser = string("Hello")

158

try:

159

result = parser.parse("hello") # Raises ParseError

160

except ParseError:

161

print("Case mismatch")

162

163

# Regular expression matching

164

number = regex(r'\d+')

165

result = number.parse("123abc") # Returns "123"

166

167

# Regex with flags

168

import re

169

word = regex(r'[a-z]+', re.IGNORECASE)

170

result = word.parse("Hello") # Returns "Hello"

171

```

172

173

### Character Class Matching

174

175

```python

176

from parsec import one_of, none_of, letter, digit

177

178

# Character from set

179

vowel = one_of("aeiou")

180

result = vowel.parse("apple") # Returns 'a'

181

182

# Character not in set

183

consonant = none_of("aeiou")

184

result = consonant.parse("hello") # Returns 'h'

185

186

# Alphabetic characters

187

char = letter()

188

result = char.parse("abc123") # Returns 'a'

189

190

# Numeric digits

191

num = digit()

192

result = num.parse("123abc") # Returns '1'

193

```

194

195

### Whitespace Handling

196

197

```python

198

from parsec import space, spaces, string

199

200

# Single whitespace

201

ws = space()

202

result = ws.parse(" hello") # Returns ' '

203

204

# Multiple whitespace (optional)

205

ws_many = spaces()

206

result = ws_many.parse(" hello") # Returns [' ', ' ', ' ']

207

result = ws_many.parse("hello") # Returns [] (empty list)

208

209

# Parse words separated by whitespace

210

@generate

211

def spaced_words():

212

word1 = yield many1(letter())

213

yield spaces()

214

word2 = yield many1(letter())

215

return ("".join(word1), "".join(word2))

216

217

result = spaced_words.parse("hello world") # Returns ("hello", "world")

218

```

219

220

### Complex String Patterns

221

222

```python

223

from parsec import regex, string, many, letter, digit

224

225

# Email-like pattern

226

email = regex(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')

227

result = email.parse("user@example.com") # Returns "user@example.com"

228

229

# Quoted strings

230

@generate

231

def quoted_string():

232

yield string('"')

233

content = yield many(none_of('"'))

234

yield string('"')

235

return "".join(content)

236

237

result = quoted_string.parse('"hello world"') # Returns "hello world"

238

239

# Numbers with optional decimal

240

from parsec import Parser, Value

241

242

@generate

243

def decimal_number():

244

integer_part = yield many1(digit())

245

decimal_part = yield (string('.') >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))

246

if decimal_part:

247

return float("".join(integer_part) + "." + "".join(decimal_part))

248

else:

249

return int("".join(integer_part))

250

251

result = decimal_number.parse("123.45") # Returns 123.45

252

result = decimal_number.parse("123") # Returns 123

253

```

254

255

### End-of-Input Validation

256

257

```python

258

from parsec import eof, many1, letter

259

260

# Ensure entire input is letters

261

letters_only = many1(letter()) << eof()

262

result = letters_only.parse("hello") # Returns ['h','e','l','l','o']

263

try:

264

result = letters_only.parse("hello123") # Raises ParseError

265

except ParseError:

266

print("Non-letters found")

267

268

# Parse complete tokens

269

@generate

270

def complete_word():

271

chars = yield many1(letter())

272

yield eof() # Ensure nothing follows

273

return "".join(chars)

274

275

result = complete_word.parse("hello") # Returns "hello"

276

```