or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

categories.mdcontent-extraction.mdindex.mdpage-navigation.mdwikipedia-wrapper.md

categories.mddocs/

0

# Categories

1

2

Work with Wikipedia's category system including page categories and category membership. Categories provide hierarchical organization of Wikipedia content, enabling discovery of related articles and navigation through topical groupings.

3

4

## Capabilities

5

6

### Page Categories

7

8

Access categories that a page belongs to, providing topical classification and related content discovery.

9

10

```python { .api }

11

class WikipediaPage:

12

@property

13

def categories(self) -> dict[str, WikipediaPage]:

14

"""

15

Get categories that this page belongs to.

16

17

Returns:

18

Dictionary mapping category names to WikipediaPage objects.

19

Keys are category names (including "Category:" prefix),

20

values are WikipediaPage instances representing category pages.

21

"""

22

```

23

24

#### Usage Examples

25

26

```python

27

import wikipediaapi

28

29

wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')

30

page = wiki.page('Machine_learning')

31

32

# Get all categories for the page

33

categories = page.categories

34

print(f"Page belongs to {len(categories)} categories")

35

36

# Browse categories

37

for category_name, category_page in categories.items():

38

print(f"Category: {category_name}")

39

print(f" Namespace: {category_page.namespace}") # Should be 14 for categories

40

print(f" Title: {category_page.title}")

41

42

# Find specific types of categories

43

subject_categories = []

44

maintenance_categories = []

45

46

for cat_name, cat_page in categories.items():

47

if any(keyword in cat_name.lower() for keyword in ['computer science', 'algorithms', 'artificial intelligence']):

48

subject_categories.append(cat_name)

49

elif any(keyword in cat_name.lower() for keyword in ['articles', 'pages', 'wikipedia']):

50

maintenance_categories.append(cat_name)

51

52

print(f"Subject categories: {len(subject_categories)}")

53

for cat in subject_categories:

54

print(f" - {cat}")

55

56

print(f"Maintenance categories: {len(maintenance_categories)}")

57

```

58

59

### Category Members

60

61

For category pages, access all pages that belong to that category, enabling exploration of topically related content.

62

63

```python { .api }

64

class WikipediaPage:

65

@property

66

def categorymembers(self) -> dict[str, WikipediaPage]:

67

"""

68

Get pages that belong to this category (for category pages).

69

70

Returns:

71

Dictionary mapping page titles to WikipediaPage objects.

72

Keys are page titles, values are WikipediaPage instances.

73

Only meaningful for pages in the Category namespace.

74

"""

75

```

76

77

#### Usage Examples

78

79

```python

80

# Work with a category page

81

category_page = wiki.page('Category:Machine_learning', ns=wikipediaapi.Namespace.CATEGORY)

82

83

if category_page.exists():

84

members = category_page.categorymembers

85

print(f"Category has {len(members)} members")

86

87

# Analyze category members by namespace

88

articles = []

89

subcategories = []

90

other = []

91

92

for title, member_page in members.items():

93

if member_page.namespace == wikipediaapi.Namespace.MAIN:

94

articles.append(title)

95

elif member_page.namespace == wikipediaapi.Namespace.CATEGORY:

96

subcategories.append(title)

97

else:

98

other.append(title)

99

100

print(f"Articles: {len(articles)}")

101

print(f"Subcategories: {len(subcategories)}")

102

print(f"Other: {len(other)}")

103

104

# Show some articles in this category

105

print("\nSome articles in this category:")

106

for article_title in articles[:10]:

107

print(f" - {article_title}")

108

109

# Show subcategories

110

print("\nSubcategories:")

111

for subcat_title in subcategories[:5]:

112

print(f" - {subcat_title}")

113

114

# Direct category exploration

115

physics_cat = wiki.page('Category:Physics', ns=wikipediaapi.Namespace.CATEGORY)

116

if physics_cat.exists():

117

physics_members = physics_cat.categorymembers

118

print(f"Physics category has {len(physics_members)} members")

119

120

# Find notable physics articles

121

for title, page in physics_members.items():

122

if page.namespace == wikipediaapi.Namespace.MAIN:

123

# Could check page.summary or other properties to assess importance

124

if len(title) < 30: # Simple heuristic for main topics

125

print(f"Physics article: {title}")

126

```

127

128

### Category Hierarchy Navigation

129

130

Navigate through Wikipedia's category hierarchy to explore related topics and subcategories.

131

132

#### Usage Examples

133

134

```python

135

def explore_category_hierarchy(category_name, max_depth=2, current_depth=0):

136

"""Recursively explore category hierarchy."""

137

if current_depth >= max_depth:

138

return

139

140

wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')

141

category_page = wiki.page(category_name, ns=wikipediaapi.Namespace.CATEGORY)

142

143

if not category_page.exists():

144

return

145

146

indent = " " * current_depth

147

print(f"{indent}Category: {category_name}")

148

149

members = category_page.categorymembers

150

articles = []

151

subcategories = []

152

153

for title, member_page in members.items():

154

if member_page.namespace == wikipediaapi.Namespace.MAIN:

155

articles.append(title)

156

elif member_page.namespace == wikipediaapi.Namespace.CATEGORY:

157

subcategories.append(title)

158

159

print(f"{indent} Articles: {len(articles)}")

160

print(f"{indent} Subcategories: {len(subcategories)}")

161

162

# Show some articles

163

for article in articles[:3]:

164

print(f"{indent} - {article}")

165

166

# Recurse into subcategories

167

for subcat in subcategories[:3]: # Limit to prevent too much output

168

explore_category_hierarchy(subcat, max_depth, current_depth + 1)

169

170

# Explore computer science hierarchy

171

explore_category_hierarchy('Category:Computer_science', max_depth=3)

172

```

173

174

### Category-Based Content Discovery

175

176

Use categories to discover related content and analyze topical relationships.

177

178

#### Usage Examples

179

180

```python

181

def find_related_articles_via_categories(page_title, min_shared_categories=2):

182

"""Find articles related via shared categories."""

183

wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')

184

page = wiki.page(page_title)

185

186

if not page.exists():

187

return []

188

189

page_categories = set(page.categories.keys())

190

related_articles = {}

191

192

# Check each category this page belongs to

193

for category_name, category_page in page.categories.items():

194

if category_page.exists():

195

# Get other articles in this category

196

for member_title, member_page in category_page.categorymembers.items():

197

if (member_page.namespace == wikipediaapi.Namespace.MAIN and

198

member_title != page_title):

199

200

if member_title not in related_articles:

201

related_articles[member_title] = set()

202

related_articles[member_title].add(category_name)

203

204

# Filter by minimum shared categories

205

highly_related = []

206

for article_title, shared_cats in related_articles.items():

207

if len(shared_cats) >= min_shared_categories:

208

highly_related.append((article_title, len(shared_cats), shared_cats))

209

210

# Sort by number of shared categories

211

highly_related.sort(key=lambda x: x[1], reverse=True)

212

return highly_related

213

214

def analyze_category_overlap(page1_title, page2_title):

215

"""Analyze category overlap between two pages."""

216

wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')

217

218

page1 = wiki.page(page1_title)

219

page2 = wiki.page(page2_title)

220

221

if not (page1.exists() and page2.exists()):

222

return None

223

224

cats1 = set(page1.categories.keys())

225

cats2 = set(page2.categories.keys())

226

227

shared = cats1.intersection(cats2)

228

only1 = cats1 - cats2

229

only2 = cats2 - cats1

230

231

return {

232

'shared_categories': sorted(shared),

233

'only_in_first': sorted(only1),

234

'only_in_second': sorted(only2),

235

'similarity_ratio': len(shared) / (len(cats1.union(cats2)) if cats1.union(cats2) else 1)

236

}

237

238

# Find articles related to "Neural network"

239

related = find_related_articles_via_categories('Neural_network', min_shared_categories=2)

240

print(f"Found {len(related)} highly related articles:")

241

for article, shared_count, categories in related[:10]:

242

print(f" {article} (shares {shared_count} categories)")

243

for cat in list(categories)[:3]: # Show first 3 shared categories

244

print(f" - {cat}")

245

246

# Compare two pages via categories

247

comparison = analyze_category_overlap('Machine_learning', 'Deep_learning')

248

if comparison:

249

print(f"Similarity ratio: {comparison['similarity_ratio']:.2f}")

250

print(f"Shared categories: {len(comparison['shared_categories'])}")

251

for cat in comparison['shared_categories'][:5]:

252

print(f" - {cat}")

253

```

254

255

### Category Filtering and Analysis

256

257

Advanced category filtering and analysis for content organization.

258

259

#### Usage Examples

260

261

```python

262

def filter_categories_by_type(page_title):

263

"""Categorize page categories by type."""

264

wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')

265

page = wiki.page(page_title)

266

267

if not page.exists():

268

return None

269

270

categorized = {

271

'subject': [], # Subject matter categories

272

'geographic': [], # Geographic categories

273

'temporal': [], # Time-based categories

274

'maintenance': [], # Wikipedia maintenance categories

275

'other': []

276

}

277

278

for cat_name in page.categories.keys():

279

cat_lower = cat_name.lower()

280

281

if any(keyword in cat_lower for keyword in ['articles', 'pages', 'wikipedia', 'cleanup']):

282

categorized['maintenance'].append(cat_name)

283

elif any(keyword in cat_lower for keyword in ['country', 'city', 'region', 'american', 'european']):

284

categorized['geographic'].append(cat_name)

285

elif any(keyword in cat_lower for keyword in ['century', 'year', 'decade', 'era']):

286

categorized['temporal'].append(cat_name)

287

elif any(keyword in cat_lower for keyword in ['science', 'mathematics', 'computer', 'physics']):

288

categorized['subject'].append(cat_name)

289

else:

290

categorized['other'].append(cat_name)

291

292

return categorized

293

294

def get_category_statistics(category_name):

295

"""Get statistics about a category."""

296

wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')

297

category_page = wiki.page(category_name, ns=wikipediaapi.Namespace.CATEGORY)

298

299

if not category_page.exists():

300

return None

301

302

members = category_page.categorymembers

303

stats = {

304

'total_members': len(members),

305

'articles': 0,

306

'subcategories': 0,

307

'other': 0,

308

'member_types': {}

309

}

310

311

for title, member_page in members.items():

312

ns = member_page.namespace

313

if ns == wikipediaapi.Namespace.MAIN:

314

stats['articles'] += 1

315

elif ns == wikipediaapi.Namespace.CATEGORY:

316

stats['subcategories'] += 1

317

else:

318

stats['other'] += 1

319

320

ns_name = f"Namespace_{ns}"

321

stats['member_types'][ns_name] = stats['member_types'].get(ns_name, 0) + 1

322

323

return stats

324

325

# Analyze page categories

326

category_analysis = filter_categories_by_type('Quantum_computing')

327

if category_analysis:

328

for cat_type, cats in category_analysis.items():

329

if cats:

330

print(f"{cat_type.title()} categories ({len(cats)}):")

331

for cat in cats[:3]: # Show first 3

332

print(f" - {cat}")

333

334

# Get category statistics

335

stats = get_category_statistics('Category:Artificial_intelligence')

336

if stats:

337

print(f"Category statistics:")

338

print(f" Total members: {stats['total_members']}")

339

print(f" Articles: {stats['articles']}")

340

print(f" Subcategories: {stats['subcategories']}")

341

print(f" Other: {stats['other']}")

342

```