Tessl Tile for pypi/feedparser@6.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

data-structures.md date-handling.md error-handling.md http-features.md index.md parsing.md

error-handling.mddocs/

0
# Error Handling
1

2
Feedparser provides comprehensive error handling with graceful degradation, allowing feed parsing to continue even when encountering malformed content, encoding issues, or network problems.
3

4
## Capabilities
5

6
### Exception Classes
7

8
Feedparser defines several exception classes for different types of parsing issues:
9

10
```python { .api }
11
class ThingsNobodyCaresAboutButMe(Exception):
12
    """
13
    Base exception for minor parsing issues that don't prevent feed processing.
14
    These exceptions are captured in bozo_exception but don't stop parsing.
15
    """
16

17
class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe):
18
    """
19
    Raised when character encoding is overridden during parsing.
20
    Indicates encoding was detected/specified differently than declared.
21
    """
22

23
class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe):
24
    """
25
    Raised when character encoding cannot be determined.
26
    Parser will fall back to default encoding handling.
27
    """
28

29
class NonXMLContentType(ThingsNobodyCaresAboutButMe):
30
    """
31
    Raised when content type is not XML but parsing continues anyway.
32
    Common when servers misconfigure content-type headers.
33
    """
34

35
class UndeclaredNamespace(Exception):
36
    """
37
    Raised when XML contains undeclared namespaces.
38
    More serious than other exceptions but parsing may still continue.
39
    """
40
```
41

42
## Bozo Detection System
43

44
Feedparser uses a "bozo" flag system to indicate when feeds have issues while still attempting to parse them:
45

46
### Bozo Flag
47

48
```python
49
result = feedparser.parse(url)
50

51
# Check if feed had issues
52
if result.bozo:
53
    print("Feed had parsing issues")
54
    print(f"Exception: {result.bozo_exception}")
55
    print(f"Exception type: {type(result.bozo_exception).__name__}")
56
else:
57
    print("Feed parsed cleanly")
58

59
# Feed data may still be available even with bozo=True
60
print(f"Found {len(result.entries)} entries")
61
```
62

63
### Common Bozo Scenarios
64

65
```python
66
import urllib.error
67
import xml.sax
68

69
result = feedparser.parse(problematic_url)
70

71
if result.bozo:
72
    exception = result.bozo_exception
73
    
74
    # Network/HTTP errors
75
    if isinstance(exception, urllib.error.HTTPError):
76
        print(f"HTTP {exception.code}: {exception.reason}")
77
    elif isinstance(exception, urllib.error.URLError):
78
        print(f"Network error: {exception.reason}")
79
    
80
    # XML parsing errors
81
    elif isinstance(exception, xml.sax.SAXException):
82
        print(f"XML parsing error: {exception}")
83
    
84
    # Feedparser-specific issues
85
    elif isinstance(exception, feedparser.CharacterEncodingUnknown):
86
        print("Could not determine character encoding")
87
    elif isinstance(exception, feedparser.NonXMLContentType):
88
        print("Content type is not XML")
89
    elif isinstance(exception, feedparser.UndeclaredNamespace):
90
        print("Feed contains undeclared XML namespaces")
91
    
92
    # Generic issues
93
    else:
94
        print(f"Other parsing issue: {exception}")
95
```
96

97
## Error Recovery Strategies
98

99
### Graceful Degradation
100

101
Feedparser attempts to extract as much data as possible even from problematic feeds:
102

103
```python
104
result = feedparser.parse(malformed_feed_url)
105

106
# Always check what data was successfully extracted
107
print(f"Bozo: {result.bozo}")
108
print(f"Entries found: {len(result.entries)}")
109
print(f"Feed title: {result.feed.get('title', 'No title')}")
110

111
# Process available data despite errors
112
if result.entries:
113
    print("Processing entries despite parsing issues:")
114
    for entry in result.entries:
115
        title = entry.get('title', 'No title')
116
        link = entry.get('link', 'No link')
117
        print(f"  - {title}: {link}")
118
```
119

120
### Parser Fallback System
121

122
Feedparser automatically falls back from strict XML parsing to loose HTML-style parsing:
123

124
```python
125
result = feedparser.parse(questionable_feed)
126

127
# Check which parser was used
128
if result.bozo:
129
    # Likely used loose parser due to malformed XML
130
    print("Used tolerant parser for malformed content")
131
else:
132
    # Used strict XML parser
133
    print("Used strict XML parser")
134

135
# Both parsers can produce valid results
136
if result.version:
137
    print(f"Detected format: {result.version}")
138
```
139

140
## Error Handling Patterns
141

142
### Safe Data Access
143

144
Always use safe access patterns when dealing with potentially problematic feeds:
145

146
```python
147
result = feedparser.parse(url)
148

149
# Safe feed-level access
150
feed_title = result.feed.get('title', 'Untitled Feed')
151
feed_link = result.feed.get('link', '')
152
feed_description = result.feed.get('description', 'No description')
153

154
# Safe entry-level access  
155
for entry in result.entries:
156
    title = entry.get('title', 'Untitled')
157
    link = entry.get('link', '#')
158
    summary = entry.get('summary', 'No summary')
159
    
160
    # Check for date parsing success
161
    if entry.get('published_parsed'):
162
        import time
163
        pub_date = time.strftime('%Y-%m-%d', entry.published_parsed)
164
    else:
165
        pub_date = entry.get('published', 'Unknown date')
166
    
167
    print(f"{title} ({pub_date}): {summary[:100]}...")
168
```
169

170
### Exception Handling Wrapper
171

172
```python
173
import feedparser
174
import logging
175

176
def safe_parse_feed(url, max_retries=3):
177
    """
178
    Safely parse a feed with error handling and retries.
179
    """
180
    for attempt in range(max_retries):
181
        try:
182
            result = feedparser.parse(url)
183
            
184
            # Log parsing issues but continue
185
            if result.bozo:
186
                logging.warning(f"Feed parsing issues for {url}: {result.bozo_exception}")
187
            
188
            # Validate minimum data requirements
189
            if not result.feed and not result.entries:
190
                logging.error(f"No usable data found in feed: {url}")
191
                if attempt < max_retries - 1:
192
                    continue
193
                return None
194
            
195
            return result
196
            
197
        except Exception as e:
198
            logging.error(f"Unexpected error parsing {url} (attempt {attempt + 1}): {e}")
199
            if attempt < max_retries - 1:
200
                import time
201
                time.sleep(2 ** attempt)  # Exponential backoff
202
            
203
    return None
204

205
# Usage
206
result = safe_parse_feed('https://example.com/problematic-feed.xml')
207
if result:
208
    print(f"Successfully parsed feed with {len(result.entries)} entries")
209
else:
210
    print("Failed to parse feed after all retries")
211
```
212

213
### Content Validation
214

215
```python
216
def validate_feed_content(result):
217
    """
218
    Validate and report on feed content quality.
219
    """
220
    issues = []
221
    
222
    # Check parsing status
223
    if result.bozo:
224
        issues.append(f"Parsing issues: {result.bozo_exception}")
225
    
226
    # Check feed-level data
227
    if not result.feed.get('title'):
228
        issues.append("Feed has no title")
229
    
230
    if not result.feed.get('link'):
231
        issues.append("Feed has no link")
232
    
233
    # Check entry quality
234
    if not result.entries:
235
        issues.append("Feed has no entries")
236
    else:
237
        entries_without_titles = sum(1 for e in result.entries if not e.get('title'))
238
        if entries_without_titles:
239
            issues.append(f"{entries_without_titles} entries missing titles")
240
        
241
        entries_without_links = sum(1 for e in result.entries if not e.get('link'))
242
        if entries_without_links:
243
            issues.append(f"{entries_without_links} entries missing links")
244
    
245
    return issues
246

247
# Usage
248
result = feedparser.parse(url)
249
issues = validate_feed_content(result)
250

251
if issues:
252
    print("Feed quality issues:")
253
    for issue in issues:
254
        print(f"  - {issue}")
255
else:
256
    print("Feed appears to be high quality")
257
```
258

259
## Character Encoding Issues
260

261
### Encoding Detection and Override
262

263
```python
264
result = feedparser.parse(url)
265

266
# Check encoding information
267
print(f"Detected encoding: {result.get('encoding', 'Unknown')}")
268

269
# Handle encoding-related exceptions
270
if result.bozo:
271
    if isinstance(result.bozo_exception, feedparser.CharacterEncodingOverride):
272
        print("Encoding was overridden during parsing")
273
    elif isinstance(result.bozo_exception, feedparser.CharacterEncodingUnknown):
274
        print("Could not determine character encoding")
275

276
# Content should still be usable with UTF-8 conversion
277
for entry in result.entries:
278
    # Text content is normalized to Unicode
279
    title = entry.get('title', '')
280
    if title:
281
        print(f"Entry title: {title}")
282
```
283

284
### Handling Malformed Unicode
285

286
```python
287
def clean_text_content(text):
288
    """
289
    Clean text content that may have encoding issues.
290
    """
291
    if not text:
292
        return text
293
    
294
    # Remove or replace problematic characters
295
    import unicodedata
296
    
297
    # Normalize Unicode
298
    text = unicodedata.normalize('NFKC', text)
299
    
300
    # Remove control characters except whitespace
301
    text = ''.join(char for char in text if unicodedata.category(char)[0] != 'C' or char.isspace())
302
    
303
    return text.strip()
304

305
# Apply to feed content
306
result = feedparser.parse(url)
307
for entry in result.entries:
308
    title = clean_text_content(entry.get('title', ''))
309
    summary = clean_text_content(entry.get('summary', ''))
310
    # Use cleaned content...
311
```
312

313
## Network Error Handling
314

315
### HTTP Error Recovery
316

317
```python
318
import urllib.error
319

320
def parse_with_fallbacks(urls):
321
    """
322
    Try multiple feed URLs with fallback handling.
323
    """
324
    for url in urls:
325
        try:
326
            result = feedparser.parse(url)
327
            
328
            # Check HTTP status
329
            if hasattr(result, 'status'):
330
                if result.status == 200:
331
                    return result
332
                elif result.status == 301 or result.status == 302:
333
                    # Follow redirect manually if needed
334
                    redirect_url = result.headers.get('location')
335
                    if redirect_url:
336
                        return feedparser.parse(redirect_url)
337
                elif result.status == 304:
338
                    # Not modified - could return cached version
339
                    pass
340
                elif result.status >= 400:
341
                    print(f"HTTP {result.status} for {url}")
342
                    continue
343
            
344
            # Return result even with minor issues
345
            if result.entries or result.feed:
346
                return result
347
                
348
        except Exception as e:
349
            print(f"Failed to parse {url}: {e}")
350
            continue
351
    
352
    return None
353

354
# Usage with multiple possible feed URLs
355
feed_urls = [
356
    'https://example.com/feed.xml',
357
    'https://example.com/rss.xml', 
358
    'https://example.com/atom.xml',
359
    'https://example.com/feeds/all.xml'
360
]
361

362
result = parse_with_fallbacks(feed_urls)
363
```
364

365
### Timeout and Connection Issues
366

367
```python
368
import socket
369
import urllib.error
370

371
# Set reasonable timeout
372
socket.setdefaulttimeout(30)
373

374
def parse_with_timeout_handling(url):
375
    """
376
    Parse feed with proper timeout and connection error handling.
377
    """
378
    try:
379
        result = feedparser.parse(url)
380
        return result
381
        
382
    except socket.timeout:
383
        print(f"Timeout accessing {url}")
384
        return None
385
        
386
    except socket.gaierror as e:
387
        print(f"DNS resolution failed for {url}: {e}")
388
        return None
389
        
390
    except ConnectionResetError:
391
        print(f"Connection reset by peer: {url}")
392
        return None
393
        
394
    except urllib.error.URLError as e:
395
        if isinstance(e.reason, socket.timeout):
396
            print(f"Timeout: {url}")
397
        else:
398
            print(f"URL error for {url}: {e.reason}")
399
        return None
400
```
401

402
## Logging and Monitoring
403

404
### Comprehensive Error Logging
405

406
```python
407
import logging
408
import feedparser
409

410
# Configure logging
411
logging.basicConfig(level=logging.INFO)
412
logger = logging.getLogger(__name__)
413

414
def parse_and_log(url):
415
    """
416
    Parse feed with comprehensive logging.
417
    """
418
    logger.info(f"Parsing feed: {url}")
419
    
420
    result = feedparser.parse(url)
421
    
422
    # Log parsing results
423
    if result.bozo:
424
        logger.warning(f"Bozo feed {url}: {result.bozo_exception}")
425
    else:
426
        logger.info(f"Clean parse: {url}")
427
    
428
    # Log HTTP information
429
    if hasattr(result, 'status'):
430
        logger.info(f"HTTP {result.status}: {url}")
431
    
432
    # Log content statistics
433
    logger.info(f"Feed: {len(result.entries)} entries, version: {result.get('version', 'unknown')}")
434
    
435
    # Log encoding information
436
    if 'encoding' in result:
437
        logger.info(f"Encoding: {result.encoding}")
438
    
439
    return result
440
```

Version

Tile

Files

error-handling.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

error-handling.mddocs/