0
# Categories
1
2
Work with Wikipedia's category system including page categories and category membership. Categories provide hierarchical organization of Wikipedia content, enabling discovery of related articles and navigation through topical groupings.
3
4
## Capabilities
5
6
### Page Categories
7
8
Access categories that a page belongs to, providing topical classification and related content discovery.
9
10
```python { .api }
11
class WikipediaPage:
12
@property
13
def categories(self) -> dict[str, WikipediaPage]:
14
"""
15
Get categories that this page belongs to.
16
17
Returns:
18
Dictionary mapping category names to WikipediaPage objects.
19
Keys are category names (including "Category:" prefix),
20
values are WikipediaPage instances representing category pages.
21
"""
22
```
23
24
#### Usage Examples
25
26
```python
27
import wikipediaapi
28
29
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
30
page = wiki.page('Machine_learning')
31
32
# Get all categories for the page
33
categories = page.categories
34
print(f"Page belongs to {len(categories)} categories")
35
36
# Browse categories
37
for category_name, category_page in categories.items():
38
print(f"Category: {category_name}")
39
print(f" Namespace: {category_page.namespace}") # Should be 14 for categories
40
print(f" Title: {category_page.title}")
41
42
# Find specific types of categories
43
subject_categories = []
44
maintenance_categories = []
45
46
for cat_name, cat_page in categories.items():
47
if any(keyword in cat_name.lower() for keyword in ['computer science', 'algorithms', 'artificial intelligence']):
48
subject_categories.append(cat_name)
49
elif any(keyword in cat_name.lower() for keyword in ['articles', 'pages', 'wikipedia']):
50
maintenance_categories.append(cat_name)
51
52
print(f"Subject categories: {len(subject_categories)}")
53
for cat in subject_categories:
54
print(f" - {cat}")
55
56
print(f"Maintenance categories: {len(maintenance_categories)}")
57
```
58
59
### Category Members
60
61
For category pages, access all pages that belong to that category, enabling exploration of topically related content.
62
63
```python { .api }
64
class WikipediaPage:
65
@property
66
def categorymembers(self) -> dict[str, WikipediaPage]:
67
"""
68
Get pages that belong to this category (for category pages).
69
70
Returns:
71
Dictionary mapping page titles to WikipediaPage objects.
72
Keys are page titles, values are WikipediaPage instances.
73
Only meaningful for pages in the Category namespace.
74
"""
75
```
76
77
#### Usage Examples
78
79
```python
80
# Work with a category page
81
category_page = wiki.page('Category:Machine_learning', ns=wikipediaapi.Namespace.CATEGORY)
82
83
if category_page.exists():
84
members = category_page.categorymembers
85
print(f"Category has {len(members)} members")
86
87
# Analyze category members by namespace
88
articles = []
89
subcategories = []
90
other = []
91
92
for title, member_page in members.items():
93
if member_page.namespace == wikipediaapi.Namespace.MAIN:
94
articles.append(title)
95
elif member_page.namespace == wikipediaapi.Namespace.CATEGORY:
96
subcategories.append(title)
97
else:
98
other.append(title)
99
100
print(f"Articles: {len(articles)}")
101
print(f"Subcategories: {len(subcategories)}")
102
print(f"Other: {len(other)}")
103
104
# Show some articles in this category
105
print("\nSome articles in this category:")
106
for article_title in articles[:10]:
107
print(f" - {article_title}")
108
109
# Show subcategories
110
print("\nSubcategories:")
111
for subcat_title in subcategories[:5]:
112
print(f" - {subcat_title}")
113
114
# Direct category exploration
115
physics_cat = wiki.page('Category:Physics', ns=wikipediaapi.Namespace.CATEGORY)
116
if physics_cat.exists():
117
physics_members = physics_cat.categorymembers
118
print(f"Physics category has {len(physics_members)} members")
119
120
# Find notable physics articles
121
for title, page in physics_members.items():
122
if page.namespace == wikipediaapi.Namespace.MAIN:
123
# Could check page.summary or other properties to assess importance
124
if len(title) < 30: # Simple heuristic for main topics
125
print(f"Physics article: {title}")
126
```
127
128
### Category Hierarchy Navigation
129
130
Navigate through Wikipedia's category hierarchy to explore related topics and subcategories.
131
132
#### Usage Examples
133
134
```python
135
def explore_category_hierarchy(category_name, max_depth=2, current_depth=0):
136
"""Recursively explore category hierarchy."""
137
if current_depth >= max_depth:
138
return
139
140
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
141
category_page = wiki.page(category_name, ns=wikipediaapi.Namespace.CATEGORY)
142
143
if not category_page.exists():
144
return
145
146
indent = " " * current_depth
147
print(f"{indent}Category: {category_name}")
148
149
members = category_page.categorymembers
150
articles = []
151
subcategories = []
152
153
for title, member_page in members.items():
154
if member_page.namespace == wikipediaapi.Namespace.MAIN:
155
articles.append(title)
156
elif member_page.namespace == wikipediaapi.Namespace.CATEGORY:
157
subcategories.append(title)
158
159
print(f"{indent} Articles: {len(articles)}")
160
print(f"{indent} Subcategories: {len(subcategories)}")
161
162
# Show some articles
163
for article in articles[:3]:
164
print(f"{indent} - {article}")
165
166
# Recurse into subcategories
167
for subcat in subcategories[:3]: # Limit to prevent too much output
168
explore_category_hierarchy(subcat, max_depth, current_depth + 1)
169
170
# Explore computer science hierarchy
171
explore_category_hierarchy('Category:Computer_science', max_depth=3)
172
```
173
174
### Category-Based Content Discovery
175
176
Use categories to discover related content and analyze topical relationships.
177
178
#### Usage Examples
179
180
```python
181
def find_related_articles_via_categories(page_title, min_shared_categories=2):
182
"""Find articles related via shared categories."""
183
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
184
page = wiki.page(page_title)
185
186
if not page.exists():
187
return []
188
189
page_categories = set(page.categories.keys())
190
related_articles = {}
191
192
# Check each category this page belongs to
193
for category_name, category_page in page.categories.items():
194
if category_page.exists():
195
# Get other articles in this category
196
for member_title, member_page in category_page.categorymembers.items():
197
if (member_page.namespace == wikipediaapi.Namespace.MAIN and
198
member_title != page_title):
199
200
if member_title not in related_articles:
201
related_articles[member_title] = set()
202
related_articles[member_title].add(category_name)
203
204
# Filter by minimum shared categories
205
highly_related = []
206
for article_title, shared_cats in related_articles.items():
207
if len(shared_cats) >= min_shared_categories:
208
highly_related.append((article_title, len(shared_cats), shared_cats))
209
210
# Sort by number of shared categories
211
highly_related.sort(key=lambda x: x[1], reverse=True)
212
return highly_related
213
214
def analyze_category_overlap(page1_title, page2_title):
215
"""Analyze category overlap between two pages."""
216
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
217
218
page1 = wiki.page(page1_title)
219
page2 = wiki.page(page2_title)
220
221
if not (page1.exists() and page2.exists()):
222
return None
223
224
cats1 = set(page1.categories.keys())
225
cats2 = set(page2.categories.keys())
226
227
shared = cats1.intersection(cats2)
228
only1 = cats1 - cats2
229
only2 = cats2 - cats1
230
231
return {
232
'shared_categories': sorted(shared),
233
'only_in_first': sorted(only1),
234
'only_in_second': sorted(only2),
235
'similarity_ratio': len(shared) / (len(cats1.union(cats2)) if cats1.union(cats2) else 1)
236
}
237
238
# Find articles related to "Neural network"
239
related = find_related_articles_via_categories('Neural_network', min_shared_categories=2)
240
print(f"Found {len(related)} highly related articles:")
241
for article, shared_count, categories in related[:10]:
242
print(f" {article} (shares {shared_count} categories)")
243
for cat in list(categories)[:3]: # Show first 3 shared categories
244
print(f" - {cat}")
245
246
# Compare two pages via categories
247
comparison = analyze_category_overlap('Machine_learning', 'Deep_learning')
248
if comparison:
249
print(f"Similarity ratio: {comparison['similarity_ratio']:.2f}")
250
print(f"Shared categories: {len(comparison['shared_categories'])}")
251
for cat in comparison['shared_categories'][:5]:
252
print(f" - {cat}")
253
```
254
255
### Category Filtering and Analysis
256
257
Advanced category filtering and analysis for content organization.
258
259
#### Usage Examples
260
261
```python
262
def filter_categories_by_type(page_title):
263
"""Categorize page categories by type."""
264
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
265
page = wiki.page(page_title)
266
267
if not page.exists():
268
return None
269
270
categorized = {
271
'subject': [], # Subject matter categories
272
'geographic': [], # Geographic categories
273
'temporal': [], # Time-based categories
274
'maintenance': [], # Wikipedia maintenance categories
275
'other': []
276
}
277
278
for cat_name in page.categories.keys():
279
cat_lower = cat_name.lower()
280
281
if any(keyword in cat_lower for keyword in ['articles', 'pages', 'wikipedia', 'cleanup']):
282
categorized['maintenance'].append(cat_name)
283
elif any(keyword in cat_lower for keyword in ['country', 'city', 'region', 'american', 'european']):
284
categorized['geographic'].append(cat_name)
285
elif any(keyword in cat_lower for keyword in ['century', 'year', 'decade', 'era']):
286
categorized['temporal'].append(cat_name)
287
elif any(keyword in cat_lower for keyword in ['science', 'mathematics', 'computer', 'physics']):
288
categorized['subject'].append(cat_name)
289
else:
290
categorized['other'].append(cat_name)
291
292
return categorized
293
294
def get_category_statistics(category_name):
295
"""Get statistics about a category."""
296
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
297
category_page = wiki.page(category_name, ns=wikipediaapi.Namespace.CATEGORY)
298
299
if not category_page.exists():
300
return None
301
302
members = category_page.categorymembers
303
stats = {
304
'total_members': len(members),
305
'articles': 0,
306
'subcategories': 0,
307
'other': 0,
308
'member_types': {}
309
}
310
311
for title, member_page in members.items():
312
ns = member_page.namespace
313
if ns == wikipediaapi.Namespace.MAIN:
314
stats['articles'] += 1
315
elif ns == wikipediaapi.Namespace.CATEGORY:
316
stats['subcategories'] += 1
317
else:
318
stats['other'] += 1
319
320
ns_name = f"Namespace_{ns}"
321
stats['member_types'][ns_name] = stats['member_types'].get(ns_name, 0) + 1
322
323
return stats
324
325
# Analyze page categories
326
category_analysis = filter_categories_by_type('Quantum_computing')
327
if category_analysis:
328
for cat_type, cats in category_analysis.items():
329
if cats:
330
print(f"{cat_type.title()} categories ({len(cats)}):")
331
for cat in cats[:3]: # Show first 3
332
print(f" - {cat}")
333
334
# Get category statistics
335
stats = get_category_statistics('Category:Artificial_intelligence')
336
if stats:
337
print(f"Category statistics:")
338
print(f" Total members: {stats['total_members']}")
339
print(f" Articles: {stats['articles']}")
340
print(f" Subcategories: {stats['subcategories']}")
341
print(f" Other: {stats['other']}")
342
```