Wikipedia API for Python that simplifies access to Wikipedia data through the MediaWiki API
npx @tessl/cli install tessl/pypi-wikipedia@1.4.0A Python library that provides easy access to Wikipedia data through the MediaWiki API. Wikipedia simplifies search, content retrieval, and metadata extraction from Wikipedia pages without requiring direct API knowledge.
pip install wikipediaimport wikipediaAll functionality is available through the main module:
from wikipedia import search, page, summary, set_lang
from wikipedia import WikipediaPage, PageError, DisambiguationError
from datetime import timedelta # For set_rate_limiting
from decimal import Decimal # For coordinate typesimport wikipedia
from decimal import Decimal
# Search for articles
results = wikipedia.search("Barack Obama")
print(results) # ['Barack Obama', 'Barack Obama Sr.', ...]
# Get a page summary
summary = wikipedia.summary("Barack Obama", sentences=2)
print(summary)
# Get a full page with properties
page = wikipedia.page("Barack Obama")
print(page.title)
print(page.url)
print(page.content[:200]) # First 200 characters
print(page.images[:3]) # First 3 image URLs
print(page.links[:5]) # First 5 linked pages
# Geographic search
nearby = wikipedia.geosearch(40.7128, -74.0060, results=5) # NYC coordinates
print(nearby) # Articles near New York City
# Change language and search
wikipedia.set_lang("fr")
summary_fr = wikipedia.summary("Barack Obama", sentences=1)
print(summary_fr)
# Enable rate limiting for heavy usage
from datetime import timedelta
wikipedia.set_rate_limiting(True, min_wait=timedelta(milliseconds=100))Search Wikipedia for articles and get suggestions.
def search(query, results=10, suggestion=False):
"""
Search Wikipedia for articles matching the query.
Parameters:
- query (str): Search term
- results (int): Maximum number of results (default: 10)
- suggestion (bool): Return search suggestion if True (default: False)
Returns:
- list: Article titles if suggestion=False
- tuple: (titles_list, suggestion_string) if suggestion=True
"""
def geosearch(latitude, longitude, title=None, results=10, radius=1000):
"""
Geographic search for articles near coordinates.
Parameters:
- latitude (float): Latitude coordinate
- longitude (float): Longitude coordinate
- title (str, optional): Specific article to search for
- results (int): Maximum results (default: 10)
- radius (int): Search radius in meters (10-10000, default: 1000)
Returns:
- list: Article titles near the coordinates
Example:
# Find articles near the Eiffel Tower
eiffel_articles = geosearch(48.8584, 2.2945, radius=500)
# Find specific landmark near coordinates
landmarks = geosearch(40.7589, -73.9851, title="Central Park", radius=1000)
"""
def suggest(query):
"""
Get search suggestion for a query.
Parameters:
- query (str): Search term
Returns:
- str or None: Suggested search term or None if no suggestion
"""
def random(pages=1):
"""
Get random Wikipedia article titles.
Parameters:
- pages (int): Number of random articles (max 10, default: 1)
Returns:
- str: Single title if pages=1
- list: Multiple titles if pages>1
"""Retrieve article content and create page objects.
def summary(title, sentences=0, chars=0, auto_suggest=True, redirect=True):
"""
Get plain text summary of a Wikipedia page.
Parameters:
- title (str): Page title
- sentences (int): Limit to first N sentences (max 10, default: 0 for intro)
- chars (int): Limit to first N characters (default: 0 for intro)
- auto_suggest (bool): Auto-correct page title (default: True)
- redirect (bool): Follow redirects (default: True)
Returns:
- str: Plain text summary
"""
def page(title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
"""
Get WikipediaPage object for a page.
Parameters:
- title (str, optional): Page title
- pageid (int, optional): Numeric page ID (mutually exclusive with title)
- auto_suggest (bool): Auto-correct page title (default: True)
- redirect (bool): Follow redirects (default: True)
- preload (bool): Load all properties during initialization (default: False)
Returns:
- WikipediaPage: Page object with lazy-loaded properties
"""Configure library behavior for language, rate limiting, and user agent.
def set_lang(prefix):
"""
Change Wikipedia language edition.
Parameters:
- prefix (str): Two-letter language code ('en', 'fr', 'es', etc.)
Note: Clears search, suggest, and summary caches
"""
def set_user_agent(user_agent_string):
"""
Set custom User-Agent header for requests.
Parameters:
- user_agent_string (str): Custom User-Agent string
"""
def set_rate_limiting(rate_limit, min_wait=timedelta(milliseconds=50)):
"""
Enable or disable rate limiting for API requests.
Parameters:
- rate_limit (bool): Enable rate limiting
- min_wait (timedelta, optional): Minimum wait between requests
(default: timedelta(milliseconds=50))
"""Additional utility functions for language support and donations.
def languages():
"""
Get all supported Wikipedia language prefixes.
Returns:
- dict: Language code to local name mapping
"""
def donate():
"""
Open Wikimedia donation page in default browser.
"""Represents a Wikipedia page with lazy-loaded properties for content and metadata.
class WikipediaPage:
def __init__(self, title=None, pageid=None, redirect=True, preload=False, original_title=''):
"""
Initialize WikipediaPage object.
Parameters:
- title (str, optional): Page title
- pageid (int, optional): Numeric page ID
- redirect (bool): Allow redirects (default: True)
- preload (bool): Load all properties immediately (default: False)
- original_title (str): Original search title
"""
# Properties (lazy-loaded)
title: str # Page title
url: str # Full Wikipedia URL
pageid: str # Numeric page ID (stored as string)
content: str # Full plain text content
summary: str # Plain text summary (intro section)
images: list[str] # List of image URLs
coordinates: tuple[Decimal, Decimal] | None # (latitude, longitude) or None
references: list[str] # External link URLs
links: list[str] # Wikipedia page titles linked from this page
categories: list[str] # Wikipedia categories for this page
sections: list[str] # Section titles from table of contents
revision_id: int # Current revision ID
parent_id: int # Parent revision ID
def html(self):
"""
Get full page HTML content.
Returns:
- str: Complete HTML content
Warning: Can be slow for long pages
"""
def section(self, section_title):
"""
Get plain text content of a specific section.
Parameters:
- section_title (str): Section title from self.sections
Returns:
- str or None: Section content or None if not found
Warning: Only returns content between section and next subsection
"""Custom exceptions for error handling.
class WikipediaException(Exception):
"""Base exception class for all Wikipedia errors."""
def __init__(self, error):
self.error = error
class PageError(WikipediaException):
"""Raised when no Wikipedia page matches a query."""
def __init__(self, pageid=None, *args):
# Sets self.pageid or self.title based on parameters
pass
class DisambiguationError(WikipediaException):
"""Raised when a page resolves to a disambiguation page."""
def __init__(self, title, may_refer_to):
self.title = title
self.options = may_refer_to # List of possible page titles
class RedirectError(WikipediaException):
"""Raised when a page redirects but redirect=False."""
def __init__(self, title):
self.title = title
class HTTPTimeoutError(WikipediaException):
"""Raised when MediaWiki API request times out."""
def __init__(self, query):
self.query = queryimport wikipedia
# Handle page not found
try:
page = wikipedia.page("Nonexistent Page", auto_suggest=False)
except wikipedia.PageError as e:
print(f"Page not found: {e}")
# Handle disambiguation pages
try:
page = wikipedia.page("Python") # Might be ambiguous
except wikipedia.DisambiguationError as e:
print(f"Multiple pages found for '{e.title}':")
for option in e.options[:5]: # Show first 5 options
print(f" - {option}")
# Choose specific page
page = wikipedia.page(e.options[0])
# Handle redirect pages
try:
page = wikipedia.page("Redirect Page", redirect=False)
except wikipedia.RedirectError as e:
print(f"Page '{e.title}' redirects. Set redirect=True to follow.")
# Handle API timeouts with retry logic
import time
def robust_search(query, max_retries=3):
for attempt in range(max_retries):
try:
return wikipedia.search(query)
except wikipedia.HTTPTimeoutError as e:
if attempt < max_retries - 1:
print(f"Timeout on attempt {attempt + 1}, retrying...")
time.sleep(2 ** attempt) # Exponential backoff
else:
print(f"Failed after {max_retries} attempts: {e}")
return []
# Handle general Wikipedia exceptions
try:
results = wikipedia.search("test query")
page = wikipedia.page(results[0])
except wikipedia.WikipediaException as e:
print(f"Wikipedia error: {e}")
except IndexError:
print("No search results found")