or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdconfiguration.mdfile-processing.mdformatting.mdindex.mdindividual-fixes.mdtext-fixing.mdutilities.md

cli.mddocs/

0

# Command Line Interface

1

2

Command-line tool for batch text processing with configurable options for encoding, normalization, and entity handling.

3

4

## Capabilities

5

6

### Command Line Entry Point

7

8

Main function providing command-line access to ftfy text processing.

9

10

```python { .api }

11

def main() -> None:

12

"""

13

Run ftfy as command-line utility.

14

15

Processes files or standard input with configurable text fixing options.

16

Handles encoding detection, normalization settings, and HTML entity processing.

17

18

Command line usage:

19

ftfy [filename] [options]

20

21

Options:

22

-o, --output: Output file (default: stdout)

23

-g, --guess: Guess input encoding (risky)

24

-e, --encoding: Specify input encoding (default: utf-8)

25

-n, --normalization: Unicode normalization (default: NFC)

26

--preserve-entities: Don't decode HTML entities

27

28

Examples:

29

ftfy input.txt -o output.txt

30

ftfy -g mystery.txt

31

cat file.txt | ftfy > cleaned.txt

32

"""

33

```

34

35

## Command Line Usage

36

37

### Basic File Processing

38

39

```bash

40

# Fix a single file, output to stdout

41

ftfy broken_text.txt

42

43

# Fix file and save to new file

44

ftfy input.txt -o fixed_output.txt

45

46

# Process standard input

47

cat messy_file.txt | ftfy > clean_file.txt

48

echo "âœ" mojibake" | ftfy

49

```

50

51

### Encoding Options

52

53

```bash

54

# Specify input encoding explicitly

55

ftfy --encoding latin-1 oldfile.txt

56

57

# Let ftfy guess the encoding (not recommended)

58

ftfy --guess mystery_encoding.txt

59

60

# Process file with unknown encoding

61

ftfy -g -o output.txt unknown_file.txt

62

```

63

64

### Normalization and Entity Options

65

66

```bash

67

# Disable Unicode normalization

68

ftfy --normalization none input.txt

69

70

# Use NFD normalization instead of default NFC

71

ftfy --normalization NFD input.txt

72

73

# Preserve HTML entities (don't decode them)

74

ftfy --preserve-entities html_file.txt

75

76

# Combine options

77

ftfy -e latin-1 -n NFD --preserve-entities input.txt -o output.txt

78

```

79

80

### Batch Processing Examples

81

82

```bash

83

# Process all .txt files in directory

84

for file in *.txt; do

85

ftfy "$file" -o "fixed_$file"

86

done

87

88

# Process files preserving directory structure

89

find . -name "*.txt" -exec sh -c 'ftfy "$1" -o "${1%.txt}_fixed.txt"' _ {} \;

90

91

# Process with encoding detection for mixed files

92

find . -name "*.txt" -exec ftfy -g -o {}.fixed {} \;

93

```

94

95

## Python API Access

96

97

You can also access CLI functionality programmatically:

98

99

```python

100

from ftfy.cli import main

101

import sys

102

103

# Simulate command line arguments

104

sys.argv = ['ftfy', 'input.txt', '-o', 'output.txt', '--encoding', 'latin-1']

105

main()

106

```

107

108

## Usage Examples from Python

109

110

### Replicating CLI Behavior

111

112

```python

113

from ftfy import fix_file, TextFixerConfig

114

import sys

115

116

def cli_equivalent(input_file, output_file=None, encoding='utf-8',

117

normalization='NFC', preserve_entities=False, guess=False):

118

"""Replicate CLI behavior in Python."""

119

120

if guess:

121

encoding = None

122

123

unescape_html = False if preserve_entities else "auto"

124

normalization = None if normalization.lower() == 'none' else normalization

125

126

config = TextFixerConfig(

127

unescape_html=unescape_html,

128

normalization=normalization

129

)

130

131

# Open input file

132

if input_file == '-':

133

infile = sys.stdin.buffer

134

else:

135

infile = open(input_file, 'rb')

136

137

# Open output file

138

if output_file is None or output_file == '-':

139

outfile = sys.stdout

140

else:

141

outfile = open(output_file, 'w', encoding='utf-8')

142

143

try:

144

for line in fix_file(infile, encoding=encoding, config=config):

145

outfile.write(line)

146

finally:

147

if input_file != '-':

148

infile.close()

149

if output_file not in (None, '-'):

150

outfile.close()

151

152

# Usage examples

153

cli_equivalent('messy.txt', 'clean.txt')

154

cli_equivalent('latin1.txt', encoding='latin-1', preserve_entities=True)

155

cli_equivalent('unknown.txt', guess=True)

156

```

157

158

### Error Handling

159

160

The CLI handles various error conditions:

161

162

```python

163

import sys

164

from ftfy.cli import main

165

166

# Test error conditions

167

test_cases = [

168

# Same input and output file

169

['ftfy', 'test.txt', '-o', 'test.txt'],

170

171

# Invalid encoding

172

['ftfy', 'test.txt', '-e', 'invalid-encoding'],

173

174

# Non-existent input file

175

['ftfy', 'nonexistent.txt']

176

]

177

178

for args in test_cases:

179

print(f"Testing: {' '.join(args)}")

180

sys.argv = args

181

try:

182

main()

183

print("Success")

184

except SystemExit as e:

185

print(f"Exit code: {e.code}")

186

except Exception as e:

187

print(f"Error: {e}")

188

print()

189

```

190

191

### Integration with Shell Scripts

192

193

```bash

194

#!/bin/bash

195

# Script to clean up text files from various sources

196

197

FTFY_OPTIONS="--encoding utf-8 --normalization NFC"

198

199

# Function to process file with error handling

200

process_file() {

201

local input="$1"

202

local output="$2"

203

204

if ftfy $FTFY_OPTIONS "$input" -o "$output" 2>/dev/null; then

205

echo "✓ Processed: $input → $output"

206

else

207

echo "✗ Failed to process: $input"

208

# Try with encoding detection as fallback

209

if ftfy --guess "$input" -o "$output" 2>/dev/null; then

210

echo "✓ Processed with encoding detection: $input → $output"

211

else

212

echo "✗ Complete failure: $input"

213

return 1

214

fi

215

fi

216

}

217

218

# Process all text files

219

find . -name "*.txt" | while read file; do

220

process_file "$file" "${file%.txt}_clean.txt"

221

done

222

```

223

224

### Pipeline Integration

225

226

```bash

227

# Integration with common text processing pipelines

228

229

# Clean web scraping results

230

curl -s "https://example.com" | html2text | ftfy > clean_content.txt

231

232

# Process CSV files with text cleaning

233

csvcut -c description messy_data.csv | ftfy > clean_descriptions.txt

234

235

# Clean up log files

236

tail -f application.log | ftfy --preserve-entities > clean.log

237

238

# Database export cleaning

239

pg_dump --data-only mytable | ftfy -g > clean_export.sql

240

241

# Clean and normalize for analysis

242

cat survey_responses.txt | ftfy --normalization NFKC > normalized.txt

243

```

244

245

### Advanced CLI Usage

246

247

```bash

248

# Process files with specific configurations for different use cases

249

250

# Web content: preserve HTML entities, normalize for display

251

ftfy --preserve-entities --normalization NFC web_content.txt

252

253

# Database text: aggressive cleaning, compatibility normalization

254

ftfy --normalization NFKC --encoding utf-8 database_dump.txt

255

256

# Log processing: preserve structure, clean terminal escapes

257

ftfy --preserve-entities log_file.txt | grep -v "^\s*$" > clean_log.txt

258

259

# Scientific text: preserve Unicode, minimal normalization

260

ftfy --normalization NFD scientific_paper.txt

261

262

# Legacy system integration: guess encoding, normalize for compatibility

263

ftfy --guess --normalization NFKC legacy_export.txt

264

```