Tessl Tile for pypi/taxii2-client@2.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

api-root-management.md authentication-connection.md collection-operations.md index.md pagination-support.md server-discovery.md status-monitoring.md

pagination-support.mddocs/

0
# Pagination Support
1

2
Pagination utilities for handling large result sets across different TAXII versions with automatic page traversal. The `as_pages` function provides a consistent interface for paginated requests regardless of TAXII version.
3

4
## Capabilities
5

6
### TAXII 2.1 Pagination
7

8
TAXII 2.1 uses limit/next parameters for pagination with server-driven continuation tokens.
9

10
```python { .api }
11
def as_pages(func, per_request=0, *args, **kwargs):
12
    """
13
    Generator for TAXII 2.1 endpoints supporting pagination.
14
    
15
    Parameters:
16
    - func (callable): Collection method supporting pagination (get_objects, get_manifest)
17
    - per_request (int): Number of items to request per page (0 for server default)
18
    - *args: Positional arguments to pass to the function
19
    - **kwargs: Keyword arguments to pass to the function (filters, etc.)
20
    
21
    Yields:
22
    dict: Response envelope for each page containing objects and metadata
23
    
24
    Note:
25
    - Automatically handles 'next' tokens from server responses
26
    - Adjusts per_request if server returns different amount than requested
27
    - Stops when server indicates no more pages available
28
    """
29
```
30

31
### TAXII 2.0 Pagination
32

33
TAXII 2.0 uses start/per_request parameters with HTTP Range headers for pagination.
34

35
```python { .api }
36
def as_pages(func, start=0, per_request=0, *args, **kwargs):
37
    """
38
    Generator for TAXII 2.0 endpoints supporting pagination.
39
    
40
    Parameters:
41
    - func (callable): Collection method supporting pagination (get_objects, get_manifest)
42
    - start (int): Starting index for pagination (default: 0)
43
    - per_request (int): Number of items to request per page (0 for server default)
44
    - *args: Positional arguments to pass to the function
45
    - **kwargs: Keyword arguments to pass to the function (filters, etc.)
46
    
47
    Yields:
48
    dict: Response bundle for each page containing objects and metadata
49
    
50
    Note:
51
    - Uses HTTP Content-Range headers to determine total available items
52
    - Automatically calculates next start position
53
    - Handles server-specific Range header format variations
54
    """
55
```
56

57
## Usage Examples
58

59
### Basic Pagination (TAXII 2.1)
60

61
```python
62
from taxii2client import Collection, as_pages
63

64
collection = Collection("https://taxii-server.example.com/taxii2/api1/collections/indicators/")
65

66
# Paginate through all objects with default page size
67
total_objects = 0
68
for page in as_pages(collection.get_objects):
69
    objects = page.get('objects', [])
70
    total_objects += len(objects)
71
    print(f"Page contains {len(objects)} objects (total so far: {total_objects})")
72
    
73
    # Process objects in this page
74
    for obj in objects:
75
        print(f"  {obj.get('type')}: {obj.get('id')}")
76

77
print(f"Total objects retrieved: {total_objects}")
78
```
79

80
### Custom Page Size
81

82
```python
83
# Request 50 objects per page
84
for page_num, page in enumerate(as_pages(collection.get_objects, per_request=50), 1):
85
    objects = page.get('objects', [])
86
    print(f"Page {page_num}: {len(objects)} objects")
87
    
88
    # Check if this is the last page
89
    if not page.get('more', False):  # TAXII 2.1
90
        print("This is the last page")
91
        break
92

93
# Request 100 objects per page with filter
94
for page in as_pages(collection.get_objects, per_request=100, type="indicator"):
95
    indicators = page.get('objects', [])
96
    print(f"Retrieved {len(indicators)} indicators")
97
```
98

99
### Paginated Manifest Retrieval
100

101
```python
102
# Paginate through object manifests instead of full objects
103
total_manifests = 0
104
for page in as_pages(collection.get_manifest, per_request=200):
105
    manifests = page.get('objects', [])  # Manifests are in 'objects' array
106
    total_manifests += len(manifests)
107
    
108
    print(f"Manifest page: {len(manifests)} objects")
109
    for manifest in manifests:
110
        obj_id = manifest.get('id')
111
        versions = manifest.get('versions', [])
112
        print(f"  {obj_id}: {len(versions)} versions")
113

114
print(f"Total objects in collection: {total_manifests}")
115
```
116

117
### Filtered Pagination
118

119
```python
120
from datetime import datetime, timezone
121

122
# Paginate with date filter
123
recent_date = datetime(2023, 1, 1, tzinfo=timezone.utc)
124
for page in as_pages(collection.get_objects, per_request=100, added_after=recent_date):
125
    objects = page.get('objects', [])
126
    print(f"Recent objects page: {len(objects)}")
127

128
# Paginate with type filter
129
for page in as_pages(collection.get_objects, per_request=50, type=["indicator", "malware"]):
130
    objects = page.get('objects', [])
131
    indicators = [obj for obj in objects if obj.get('type') == 'indicator']
132
    malware = [obj for obj in objects if obj.get('type') == 'malware']
133
    print(f"Page: {len(indicators)} indicators, {len(malware)} malware")
134

135
# Paginate with multiple filters
136
filters = {
137
    'type': 'indicator',
138
    'added_after': recent_date
139
}
140
for page in as_pages(collection.get_objects, per_request=75, **filters):
141
    indicators = page.get('objects', [])
142
    print(f"Recent indicators: {len(indicators)}")
143
```
144

145
### TAXII 2.0 Specific Pagination
146

147
```python
148
from taxii2client.v20 import Collection, as_pages
149

150
# For TAXII 2.0, as_pages uses start/per_request parameters
151
collection = Collection("https://taxii2-server.example.com/api1/collections/indicators/")
152

153
# Start from beginning with custom page size
154
for page in as_pages(collection.get_objects, start=0, per_request=100):
155
    objects = page.get('objects', [])
156
    print(f"TAXII 2.0 page: {len(objects)} objects")
157

158
# Start from specific offset
159
for page in as_pages(collection.get_objects, start=500, per_request=50):
160
    objects = page.get('objects', [])
161
    print(f"Starting from offset 500: {len(objects)} objects")
162
```
163

164
### Processing Large Collections
165

166
```python
167
import time
168
from datetime import datetime
169

170
# Process very large collection with progress tracking
171
start_time = datetime.now()
172
total_processed = 0
173
page_count = 0
174

175
try:
176
    for page in as_pages(collection.get_objects, per_request=1000):
177
        page_count += 1
178
        objects = page.get('objects', [])
179
        
180
        # Process objects in batch
181
        for obj in objects:
182
            # Your processing logic here
183
            process_stix_object(obj)
184
        
185
        total_processed += len(objects)
186
        elapsed = (datetime.now() - start_time).total_seconds()
187
        rate = total_processed / elapsed if elapsed > 0 else 0
188
        
189
        print(f"Page {page_count}: Processed {len(objects)} objects")
190
        print(f"  Total: {total_processed} objects in {elapsed:.1f}s ({rate:.1f} obj/s)")
191
        
192
        # Optional: Add delay to avoid overwhelming the server
193
        time.sleep(0.1)
194
        
195
except KeyboardInterrupt:
196
    print(f"\nInterrupted after processing {total_processed} objects")
197
except Exception as e:
198
    print(f"Error during pagination: {e}")
199
    
200
print(f"Final: Processed {total_processed} objects across {page_count} pages")
201
```
202

203
### Memory-Efficient Processing
204

205
```python
206
# Process large datasets without storing everything in memory
207
def process_collection_efficiently(collection, batch_size=500):
208
    """Process all objects in collection without loading everything into memory."""
209
    
210
    processed_count = 0
211
    error_count = 0
212
    
213
    for page in as_pages(collection.get_objects, per_request=batch_size):
214
        objects = page.get('objects', [])
215
        
216
        for obj in objects:
217
            try:
218
                # Process individual object
219
                result = analyze_stix_object(obj)
220
                if result:
221
                    processed_count += 1
222
            except Exception as e:
223
                print(f"Error processing {obj.get('id', 'unknown')}: {e}")
224
                error_count += 1
225
        
226
        # Clear page from memory
227
        del objects
228
        
229
        # Periodic status update
230
        if processed_count % 5000 == 0:
231
            print(f"Processed: {processed_count}, Errors: {error_count}")
232
    
233
    return processed_count, error_count
234

235
# Use the efficient processor
236
success_count, error_count = process_collection_efficiently(collection, batch_size=1000)
237
print(f"Processing complete: {success_count} successful, {error_count} errors")
238
```
239

240
### Handling Pagination Errors
241

242
```python
243
from taxii2client.exceptions import TAXIIServiceException
244

245
def robust_pagination(collection, page_size=100):
246
    """Paginate with error handling and retry logic."""
247
    
248
    page_count = 0
249
    total_objects = 0
250
    retry_count = 0
251
    max_retries = 3
252
    
253
    try:
254
        for page in as_pages(collection.get_objects, per_request=page_size):
255
            try:
256
                objects = page.get('objects', [])
257
                page_count += 1
258
                total_objects += len(objects)
259
                
260
                print(f"Page {page_count}: {len(objects)} objects")
261
                
262
                # Reset retry count on successful page
263
                retry_count = 0
264
                
265
            except TAXIIServiceException as e:
266
                retry_count += 1
267
                print(f"TAXII error on page {page_count + 1}: {e}")
268
                
269
                if retry_count >= max_retries:
270
                    print(f"Max retries ({max_retries}) exceeded, stopping")
271
                    break
272
                    
273
                print(f"Retrying page {page_count + 1} (attempt {retry_count + 1})")
274
                time.sleep(2 ** retry_count)  # Exponential backoff
275
                
276
    except Exception as e:
277
        print(f"Unexpected error during pagination: {e}")
278
    
279
    return total_objects, page_count
280

281
total, pages = robust_pagination(collection, page_size=500)
282
print(f"Retrieved {total} objects across {pages} pages")
283
```
284

285
### Server-Specific Optimizations
286

287
```python
288
# Adapt page size based on server behavior
289
def adaptive_pagination(collection, initial_page_size=100):
290
    """Automatically adjust page size based on server responses."""
291
    
292
    page_size = initial_page_size
293
    total_objects = 0
294
    
295
    for page_num, page in enumerate(as_pages(collection.get_objects, per_request=page_size), 1):
296
        objects = page.get('objects', [])
297
        actual_size = len(objects)
298
        total_objects += actual_size
299
        
300
        print(f"Page {page_num}: requested {page_size}, got {actual_size}")
301
        
302
        # Adjust page size based on server response
303
        if actual_size < page_size * 0.5 and page_size > 50:
304
            # Server returned much less than requested, reduce page size
305
            page_size = max(50, page_size // 2)
306
            print(f"  Reducing page size to {page_size}")
307
        elif actual_size == page_size and page_size < 1000:
308
            # Server returned exactly what we asked for, try larger pages
309
            page_size = min(1000, int(page_size * 1.5))
310
            print(f"  Increasing page size to {page_size}")
311
    
312
    return total_objects
313

314
total = adaptive_pagination(collection)
315
print(f"Total objects retrieved with adaptive pagination: {total}")
316
```

Version

Tile

Files

pagination-support.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

pagination-support.mddocs/