Python wrapper for Wikipedia's API that provides easy access to page content, sections, links, categories, and translations
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Work with Wikipedia's category system including page categories and category membership. Categories provide hierarchical organization of Wikipedia content, enabling discovery of related articles and navigation through topical groupings.
Access categories that a page belongs to, providing topical classification and related content discovery.
class WikipediaPage:
@property
def categories(self) -> dict[str, WikipediaPage]:
"""
Get categories that this page belongs to.
Returns:
Dictionary mapping category names to WikipediaPage objects.
Keys are category names (including "Category:" prefix),
values are WikipediaPage instances representing category pages.
"""import wikipediaapi
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
page = wiki.page('Machine_learning')
# Get all categories for the page
categories = page.categories
print(f"Page belongs to {len(categories)} categories")
# Browse categories
for category_name, category_page in categories.items():
print(f"Category: {category_name}")
print(f" Namespace: {category_page.namespace}") # Should be 14 for categories
print(f" Title: {category_page.title}")
# Find specific types of categories
subject_categories = []
maintenance_categories = []
for cat_name, cat_page in categories.items():
if any(keyword in cat_name.lower() for keyword in ['computer science', 'algorithms', 'artificial intelligence']):
subject_categories.append(cat_name)
elif any(keyword in cat_name.lower() for keyword in ['articles', 'pages', 'wikipedia']):
maintenance_categories.append(cat_name)
print(f"Subject categories: {len(subject_categories)}")
for cat in subject_categories:
print(f" - {cat}")
print(f"Maintenance categories: {len(maintenance_categories)}")For category pages, access all pages that belong to that category, enabling exploration of topically related content.
class WikipediaPage:
@property
def categorymembers(self) -> dict[str, WikipediaPage]:
"""
Get pages that belong to this category (for category pages).
Returns:
Dictionary mapping page titles to WikipediaPage objects.
Keys are page titles, values are WikipediaPage instances.
Only meaningful for pages in the Category namespace.
"""# Work with a category page
category_page = wiki.page('Category:Machine_learning', ns=wikipediaapi.Namespace.CATEGORY)
if category_page.exists():
members = category_page.categorymembers
print(f"Category has {len(members)} members")
# Analyze category members by namespace
articles = []
subcategories = []
other = []
for title, member_page in members.items():
if member_page.namespace == wikipediaapi.Namespace.MAIN:
articles.append(title)
elif member_page.namespace == wikipediaapi.Namespace.CATEGORY:
subcategories.append(title)
else:
other.append(title)
print(f"Articles: {len(articles)}")
print(f"Subcategories: {len(subcategories)}")
print(f"Other: {len(other)}")
# Show some articles in this category
print("\nSome articles in this category:")
for article_title in articles[:10]:
print(f" - {article_title}")
# Show subcategories
print("\nSubcategories:")
for subcat_title in subcategories[:5]:
print(f" - {subcat_title}")
# Direct category exploration
physics_cat = wiki.page('Category:Physics', ns=wikipediaapi.Namespace.CATEGORY)
if physics_cat.exists():
physics_members = physics_cat.categorymembers
print(f"Physics category has {len(physics_members)} members")
# Find notable physics articles
for title, page in physics_members.items():
if page.namespace == wikipediaapi.Namespace.MAIN:
# Could check page.summary or other properties to assess importance
if len(title) < 30: # Simple heuristic for main topics
print(f"Physics article: {title}")Navigate through Wikipedia's category hierarchy to explore related topics and subcategories.
def explore_category_hierarchy(category_name, max_depth=2, current_depth=0):
"""Recursively explore category hierarchy."""
if current_depth >= max_depth:
return
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
category_page = wiki.page(category_name, ns=wikipediaapi.Namespace.CATEGORY)
if not category_page.exists():
return
indent = " " * current_depth
print(f"{indent}Category: {category_name}")
members = category_page.categorymembers
articles = []
subcategories = []
for title, member_page in members.items():
if member_page.namespace == wikipediaapi.Namespace.MAIN:
articles.append(title)
elif member_page.namespace == wikipediaapi.Namespace.CATEGORY:
subcategories.append(title)
print(f"{indent} Articles: {len(articles)}")
print(f"{indent} Subcategories: {len(subcategories)}")
# Show some articles
for article in articles[:3]:
print(f"{indent} - {article}")
# Recurse into subcategories
for subcat in subcategories[:3]: # Limit to prevent too much output
explore_category_hierarchy(subcat, max_depth, current_depth + 1)
# Explore computer science hierarchy
explore_category_hierarchy('Category:Computer_science', max_depth=3)Use categories to discover related content and analyze topical relationships.
def find_related_articles_via_categories(page_title, min_shared_categories=2):
"""Find articles related via shared categories."""
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
page = wiki.page(page_title)
if not page.exists():
return []
page_categories = set(page.categories.keys())
related_articles = {}
# Check each category this page belongs to
for category_name, category_page in page.categories.items():
if category_page.exists():
# Get other articles in this category
for member_title, member_page in category_page.categorymembers.items():
if (member_page.namespace == wikipediaapi.Namespace.MAIN and
member_title != page_title):
if member_title not in related_articles:
related_articles[member_title] = set()
related_articles[member_title].add(category_name)
# Filter by minimum shared categories
highly_related = []
for article_title, shared_cats in related_articles.items():
if len(shared_cats) >= min_shared_categories:
highly_related.append((article_title, len(shared_cats), shared_cats))
# Sort by number of shared categories
highly_related.sort(key=lambda x: x[1], reverse=True)
return highly_related
def analyze_category_overlap(page1_title, page2_title):
"""Analyze category overlap between two pages."""
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
page1 = wiki.page(page1_title)
page2 = wiki.page(page2_title)
if not (page1.exists() and page2.exists()):
return None
cats1 = set(page1.categories.keys())
cats2 = set(page2.categories.keys())
shared = cats1.intersection(cats2)
only1 = cats1 - cats2
only2 = cats2 - cats1
return {
'shared_categories': sorted(shared),
'only_in_first': sorted(only1),
'only_in_second': sorted(only2),
'similarity_ratio': len(shared) / (len(cats1.union(cats2)) if cats1.union(cats2) else 1)
}
# Find articles related to "Neural network"
related = find_related_articles_via_categories('Neural_network', min_shared_categories=2)
print(f"Found {len(related)} highly related articles:")
for article, shared_count, categories in related[:10]:
print(f" {article} (shares {shared_count} categories)")
for cat in list(categories)[:3]: # Show first 3 shared categories
print(f" - {cat}")
# Compare two pages via categories
comparison = analyze_category_overlap('Machine_learning', 'Deep_learning')
if comparison:
print(f"Similarity ratio: {comparison['similarity_ratio']:.2f}")
print(f"Shared categories: {len(comparison['shared_categories'])}")
for cat in comparison['shared_categories'][:5]:
print(f" - {cat}")Advanced category filtering and analysis for content organization.
def filter_categories_by_type(page_title):
"""Categorize page categories by type."""
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
page = wiki.page(page_title)
if not page.exists():
return None
categorized = {
'subject': [], # Subject matter categories
'geographic': [], # Geographic categories
'temporal': [], # Time-based categories
'maintenance': [], # Wikipedia maintenance categories
'other': []
}
for cat_name in page.categories.keys():
cat_lower = cat_name.lower()
if any(keyword in cat_lower for keyword in ['articles', 'pages', 'wikipedia', 'cleanup']):
categorized['maintenance'].append(cat_name)
elif any(keyword in cat_lower for keyword in ['country', 'city', 'region', 'american', 'european']):
categorized['geographic'].append(cat_name)
elif any(keyword in cat_lower for keyword in ['century', 'year', 'decade', 'era']):
categorized['temporal'].append(cat_name)
elif any(keyword in cat_lower for keyword in ['science', 'mathematics', 'computer', 'physics']):
categorized['subject'].append(cat_name)
else:
categorized['other'].append(cat_name)
return categorized
def get_category_statistics(category_name):
"""Get statistics about a category."""
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
category_page = wiki.page(category_name, ns=wikipediaapi.Namespace.CATEGORY)
if not category_page.exists():
return None
members = category_page.categorymembers
stats = {
'total_members': len(members),
'articles': 0,
'subcategories': 0,
'other': 0,
'member_types': {}
}
for title, member_page in members.items():
ns = member_page.namespace
if ns == wikipediaapi.Namespace.MAIN:
stats['articles'] += 1
elif ns == wikipediaapi.Namespace.CATEGORY:
stats['subcategories'] += 1
else:
stats['other'] += 1
ns_name = f"Namespace_{ns}"
stats['member_types'][ns_name] = stats['member_types'].get(ns_name, 0) + 1
return stats
# Analyze page categories
category_analysis = filter_categories_by_type('Quantum_computing')
if category_analysis:
for cat_type, cats in category_analysis.items():
if cats:
print(f"{cat_type.title()} categories ({len(cats)}):")
for cat in cats[:3]: # Show first 3
print(f" - {cat}")
# Get category statistics
stats = get_category_statistics('Category:Artificial_intelligence')
if stats:
print(f"Category statistics:")
print(f" Total members: {stats['total_members']}")
print(f" Articles: {stats['articles']}")
print(f" Subcategories: {stats['subcategories']}")
print(f" Other: {stats['other']}")Install with Tessl CLI
npx tessl i tessl/pypi-wikipedia-api