or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-root-management.mdauthentication-connection.mdcollection-operations.mdindex.mdpagination-support.mdserver-discovery.mdstatus-monitoring.md

pagination-support.mddocs/

0

# Pagination Support

1

2

Pagination utilities for handling large result sets across different TAXII versions with automatic page traversal. The `as_pages` function provides a consistent interface for paginated requests regardless of TAXII version.

3

4

## Capabilities

5

6

### TAXII 2.1 Pagination

7

8

TAXII 2.1 uses limit/next parameters for pagination with server-driven continuation tokens.

9

10

```python { .api }

11

def as_pages(func, per_request=0, *args, **kwargs):

12

"""

13

Generator for TAXII 2.1 endpoints supporting pagination.

14

15

Parameters:

16

- func (callable): Collection method supporting pagination (get_objects, get_manifest)

17

- per_request (int): Number of items to request per page (0 for server default)

18

- *args: Positional arguments to pass to the function

19

- **kwargs: Keyword arguments to pass to the function (filters, etc.)

20

21

Yields:

22

dict: Response envelope for each page containing objects and metadata

23

24

Note:

25

- Automatically handles 'next' tokens from server responses

26

- Adjusts per_request if server returns different amount than requested

27

- Stops when server indicates no more pages available

28

"""

29

```

30

31

### TAXII 2.0 Pagination

32

33

TAXII 2.0 uses start/per_request parameters with HTTP Range headers for pagination.

34

35

```python { .api }

36

def as_pages(func, start=0, per_request=0, *args, **kwargs):

37

"""

38

Generator for TAXII 2.0 endpoints supporting pagination.

39

40

Parameters:

41

- func (callable): Collection method supporting pagination (get_objects, get_manifest)

42

- start (int): Starting index for pagination (default: 0)

43

- per_request (int): Number of items to request per page (0 for server default)

44

- *args: Positional arguments to pass to the function

45

- **kwargs: Keyword arguments to pass to the function (filters, etc.)

46

47

Yields:

48

dict: Response bundle for each page containing objects and metadata

49

50

Note:

51

- Uses HTTP Content-Range headers to determine total available items

52

- Automatically calculates next start position

53

- Handles server-specific Range header format variations

54

"""

55

```

56

57

## Usage Examples

58

59

### Basic Pagination (TAXII 2.1)

60

61

```python

62

from taxii2client import Collection, as_pages

63

64

collection = Collection("https://taxii-server.example.com/taxii2/api1/collections/indicators/")

65

66

# Paginate through all objects with default page size

67

total_objects = 0

68

for page in as_pages(collection.get_objects):

69

objects = page.get('objects', [])

70

total_objects += len(objects)

71

print(f"Page contains {len(objects)} objects (total so far: {total_objects})")

72

73

# Process objects in this page

74

for obj in objects:

75

print(f" {obj.get('type')}: {obj.get('id')}")

76

77

print(f"Total objects retrieved: {total_objects}")

78

```

79

80

### Custom Page Size

81

82

```python

83

# Request 50 objects per page

84

for page_num, page in enumerate(as_pages(collection.get_objects, per_request=50), 1):

85

objects = page.get('objects', [])

86

print(f"Page {page_num}: {len(objects)} objects")

87

88

# Check if this is the last page

89

if not page.get('more', False): # TAXII 2.1

90

print("This is the last page")

91

break

92

93

# Request 100 objects per page with filter

94

for page in as_pages(collection.get_objects, per_request=100, type="indicator"):

95

indicators = page.get('objects', [])

96

print(f"Retrieved {len(indicators)} indicators")

97

```

98

99

### Paginated Manifest Retrieval

100

101

```python

102

# Paginate through object manifests instead of full objects

103

total_manifests = 0

104

for page in as_pages(collection.get_manifest, per_request=200):

105

manifests = page.get('objects', []) # Manifests are in 'objects' array

106

total_manifests += len(manifests)

107

108

print(f"Manifest page: {len(manifests)} objects")

109

for manifest in manifests:

110

obj_id = manifest.get('id')

111

versions = manifest.get('versions', [])

112

print(f" {obj_id}: {len(versions)} versions")

113

114

print(f"Total objects in collection: {total_manifests}")

115

```

116

117

### Filtered Pagination

118

119

```python

120

from datetime import datetime, timezone

121

122

# Paginate with date filter

123

recent_date = datetime(2023, 1, 1, tzinfo=timezone.utc)

124

for page in as_pages(collection.get_objects, per_request=100, added_after=recent_date):

125

objects = page.get('objects', [])

126

print(f"Recent objects page: {len(objects)}")

127

128

# Paginate with type filter

129

for page in as_pages(collection.get_objects, per_request=50, type=["indicator", "malware"]):

130

objects = page.get('objects', [])

131

indicators = [obj for obj in objects if obj.get('type') == 'indicator']

132

malware = [obj for obj in objects if obj.get('type') == 'malware']

133

print(f"Page: {len(indicators)} indicators, {len(malware)} malware")

134

135

# Paginate with multiple filters

136

filters = {

137

'type': 'indicator',

138

'added_after': recent_date

139

}

140

for page in as_pages(collection.get_objects, per_request=75, **filters):

141

indicators = page.get('objects', [])

142

print(f"Recent indicators: {len(indicators)}")

143

```

144

145

### TAXII 2.0 Specific Pagination

146

147

```python

148

from taxii2client.v20 import Collection, as_pages

149

150

# For TAXII 2.0, as_pages uses start/per_request parameters

151

collection = Collection("https://taxii2-server.example.com/api1/collections/indicators/")

152

153

# Start from beginning with custom page size

154

for page in as_pages(collection.get_objects, start=0, per_request=100):

155

objects = page.get('objects', [])

156

print(f"TAXII 2.0 page: {len(objects)} objects")

157

158

# Start from specific offset

159

for page in as_pages(collection.get_objects, start=500, per_request=50):

160

objects = page.get('objects', [])

161

print(f"Starting from offset 500: {len(objects)} objects")

162

```

163

164

### Processing Large Collections

165

166

```python

167

import time

168

from datetime import datetime

169

170

# Process very large collection with progress tracking

171

start_time = datetime.now()

172

total_processed = 0

173

page_count = 0

174

175

try:

176

for page in as_pages(collection.get_objects, per_request=1000):

177

page_count += 1

178

objects = page.get('objects', [])

179

180

# Process objects in batch

181

for obj in objects:

182

# Your processing logic here

183

process_stix_object(obj)

184

185

total_processed += len(objects)

186

elapsed = (datetime.now() - start_time).total_seconds()

187

rate = total_processed / elapsed if elapsed > 0 else 0

188

189

print(f"Page {page_count}: Processed {len(objects)} objects")

190

print(f" Total: {total_processed} objects in {elapsed:.1f}s ({rate:.1f} obj/s)")

191

192

# Optional: Add delay to avoid overwhelming the server

193

time.sleep(0.1)

194

195

except KeyboardInterrupt:

196

print(f"\nInterrupted after processing {total_processed} objects")

197

except Exception as e:

198

print(f"Error during pagination: {e}")

199

200

print(f"Final: Processed {total_processed} objects across {page_count} pages")

201

```

202

203

### Memory-Efficient Processing

204

205

```python

206

# Process large datasets without storing everything in memory

207

def process_collection_efficiently(collection, batch_size=500):

208

"""Process all objects in collection without loading everything into memory."""

209

210

processed_count = 0

211

error_count = 0

212

213

for page in as_pages(collection.get_objects, per_request=batch_size):

214

objects = page.get('objects', [])

215

216

for obj in objects:

217

try:

218

# Process individual object

219

result = analyze_stix_object(obj)

220

if result:

221

processed_count += 1

222

except Exception as e:

223

print(f"Error processing {obj.get('id', 'unknown')}: {e}")

224

error_count += 1

225

226

# Clear page from memory

227

del objects

228

229

# Periodic status update

230

if processed_count % 5000 == 0:

231

print(f"Processed: {processed_count}, Errors: {error_count}")

232

233

return processed_count, error_count

234

235

# Use the efficient processor

236

success_count, error_count = process_collection_efficiently(collection, batch_size=1000)

237

print(f"Processing complete: {success_count} successful, {error_count} errors")

238

```

239

240

### Handling Pagination Errors

241

242

```python

243

from taxii2client.exceptions import TAXIIServiceException

244

245

def robust_pagination(collection, page_size=100):

246

"""Paginate with error handling and retry logic."""

247

248

page_count = 0

249

total_objects = 0

250

retry_count = 0

251

max_retries = 3

252

253

try:

254

for page in as_pages(collection.get_objects, per_request=page_size):

255

try:

256

objects = page.get('objects', [])

257

page_count += 1

258

total_objects += len(objects)

259

260

print(f"Page {page_count}: {len(objects)} objects")

261

262

# Reset retry count on successful page

263

retry_count = 0

264

265

except TAXIIServiceException as e:

266

retry_count += 1

267

print(f"TAXII error on page {page_count + 1}: {e}")

268

269

if retry_count >= max_retries:

270

print(f"Max retries ({max_retries}) exceeded, stopping")

271

break

272

273

print(f"Retrying page {page_count + 1} (attempt {retry_count + 1})")

274

time.sleep(2 ** retry_count) # Exponential backoff

275

276

except Exception as e:

277

print(f"Unexpected error during pagination: {e}")

278

279

return total_objects, page_count

280

281

total, pages = robust_pagination(collection, page_size=500)

282

print(f"Retrieved {total} objects across {pages} pages")

283

```

284

285

### Server-Specific Optimizations

286

287

```python

288

# Adapt page size based on server behavior

289

def adaptive_pagination(collection, initial_page_size=100):

290

"""Automatically adjust page size based on server responses."""

291

292

page_size = initial_page_size

293

total_objects = 0

294

295

for page_num, page in enumerate(as_pages(collection.get_objects, per_request=page_size), 1):

296

objects = page.get('objects', [])

297

actual_size = len(objects)

298

total_objects += actual_size

299

300

print(f"Page {page_num}: requested {page_size}, got {actual_size}")

301

302

# Adjust page size based on server response

303

if actual_size < page_size * 0.5 and page_size > 50:

304

# Server returned much less than requested, reduce page size

305

page_size = max(50, page_size // 2)

306

print(f" Reducing page size to {page_size}")

307

elif actual_size == page_size and page_size < 1000:

308

# Server returned exactly what we asked for, try larger pages

309

page_size = min(1000, int(page_size * 1.5))

310

print(f" Increasing page size to {page_size}")

311

312

return total_objects

313

314

total = adaptive_pagination(collection)

315

print(f"Total objects retrieved with adaptive pagination: {total}")

316

```