or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

index.md

tile.json

tessl/pypi-wikipedia

Wikipedia API for Python that simplifies access to Wikipedia data through the MediaWiki API

Workspace: tessl
Visibility: Public
Created: 3 months ago
Last updated: 3 months ago
Describes: pkg:pypi/wikipedia@1.4.x

To install, run

npx @tessl/cli install tessl/pypi-wikipedia@1.4.0

Wikipedia

A Python library that provides easy access to Wikipedia data through the MediaWiki API. Wikipedia simplifies search, content retrieval, and metadata extraction from Wikipedia pages without requiring direct API knowledge.

Package Information

Package Name: wikipedia
Language: Python
Installation: pip install wikipedia

Core Imports

import wikipedia

All functionality is available through the main module:

from wikipedia import search, page, summary, set_lang
from wikipedia import WikipediaPage, PageError, DisambiguationError
from datetime import timedelta  # For set_rate_limiting
from decimal import Decimal     # For coordinate types

Basic Usage

import wikipedia
from decimal import Decimal

# Search for articles
results = wikipedia.search("Barack Obama")
print(results)  # ['Barack Obama', 'Barack Obama Sr.', ...]

# Get a page summary
summary = wikipedia.summary("Barack Obama", sentences=2)
print(summary)

# Get a full page with properties
page = wikipedia.page("Barack Obama")
print(page.title)
print(page.url)
print(page.content[:200])  # First 200 characters
print(page.images[:3])     # First 3 image URLs
print(page.links[:5])      # First 5 linked pages

# Geographic search
nearby = wikipedia.geosearch(40.7128, -74.0060, results=5)  # NYC coordinates
print(nearby)  # Articles near New York City

# Change language and search
wikipedia.set_lang("fr")
summary_fr = wikipedia.summary("Barack Obama", sentences=1)
print(summary_fr)

# Enable rate limiting for heavy usage
from datetime import timedelta
wikipedia.set_rate_limiting(True, min_wait=timedelta(milliseconds=100))

Capabilities

Search Functions

Search Wikipedia for articles and get suggestions.

def search(query, results=10, suggestion=False):
    """
    Search Wikipedia for articles matching the query.
    
    Parameters:
    - query (str): Search term
    - results (int): Maximum number of results (default: 10)
    - suggestion (bool): Return search suggestion if True (default: False)
    
    Returns:
    - list: Article titles if suggestion=False
    - tuple: (titles_list, suggestion_string) if suggestion=True
    """

def geosearch(latitude, longitude, title=None, results=10, radius=1000):
    """
    Geographic search for articles near coordinates.
    
    Parameters:
    - latitude (float): Latitude coordinate
    - longitude (float): Longitude coordinate  
    - title (str, optional): Specific article to search for
    - results (int): Maximum results (default: 10)
    - radius (int): Search radius in meters (10-10000, default: 1000)
    
    Returns:
    - list: Article titles near the coordinates
    
    Example:
    # Find articles near the Eiffel Tower
    eiffel_articles = geosearch(48.8584, 2.2945, radius=500)
    # Find specific landmark near coordinates
    landmarks = geosearch(40.7589, -73.9851, title="Central Park", radius=1000)
    """

def suggest(query):
    """
    Get search suggestion for a query.
    
    Parameters:
    - query (str): Search term
    
    Returns:
    - str or None: Suggested search term or None if no suggestion
    """

def random(pages=1):
    """
    Get random Wikipedia article titles.
    
    Parameters:
    - pages (int): Number of random articles (max 10, default: 1)
    
    Returns:
    - str: Single title if pages=1
    - list: Multiple titles if pages>1
    """

Content Access

Retrieve article content and create page objects.

def summary(title, sentences=0, chars=0, auto_suggest=True, redirect=True):
    """
    Get plain text summary of a Wikipedia page.
    
    Parameters:
    - title (str): Page title
    - sentences (int): Limit to first N sentences (max 10, default: 0 for intro)
    - chars (int): Limit to first N characters (default: 0 for intro)
    - auto_suggest (bool): Auto-correct page title (default: True)
    - redirect (bool): Follow redirects (default: True)
    
    Returns:
    - str: Plain text summary
    """

def page(title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
    """
    Get WikipediaPage object for a page.
    
    Parameters:
    - title (str, optional): Page title
    - pageid (int, optional): Numeric page ID (mutually exclusive with title)
    - auto_suggest (bool): Auto-correct page title (default: True)
    - redirect (bool): Follow redirects (default: True)
    - preload (bool): Load all properties during initialization (default: False)
    
    Returns:
    - WikipediaPage: Page object with lazy-loaded properties
    """

Configuration

Configure library behavior for language, rate limiting, and user agent.

def set_lang(prefix):
    """
    Change Wikipedia language edition.
    
    Parameters:
    - prefix (str): Two-letter language code ('en', 'fr', 'es', etc.)
    
    Note: Clears search, suggest, and summary caches
    """

def set_user_agent(user_agent_string):
    """
    Set custom User-Agent header for requests.
    
    Parameters:
    - user_agent_string (str): Custom User-Agent string
    """

def set_rate_limiting(rate_limit, min_wait=timedelta(milliseconds=50)):
    """
    Enable or disable rate limiting for API requests.
    
    Parameters:
    - rate_limit (bool): Enable rate limiting
    - min_wait (timedelta, optional): Minimum wait between requests
                                    (default: timedelta(milliseconds=50))
    """

Utility Functions

Additional utility functions for language support and donations.

def languages():
    """
    Get all supported Wikipedia language prefixes.
    
    Returns:
    - dict: Language code to local name mapping
    """

def donate():
    """
    Open Wikimedia donation page in default browser.
    """

WikipediaPage Class

Represents a Wikipedia page with lazy-loaded properties for content and metadata.

class WikipediaPage:
    def __init__(self, title=None, pageid=None, redirect=True, preload=False, original_title=''):
        """
        Initialize WikipediaPage object.
        
        Parameters:
        - title (str, optional): Page title
        - pageid (int, optional): Numeric page ID
        - redirect (bool): Allow redirects (default: True)
        - preload (bool): Load all properties immediately (default: False)
        - original_title (str): Original search title
        """
    
    # Properties (lazy-loaded)
    title: str                    # Page title
    url: str                      # Full Wikipedia URL
    pageid: str                   # Numeric page ID (stored as string)
    content: str                  # Full plain text content
    summary: str                  # Plain text summary (intro section)
    images: list[str]             # List of image URLs
    coordinates: tuple[Decimal, Decimal] | None  # (latitude, longitude) or None
    references: list[str]         # External link URLs
    links: list[str]              # Wikipedia page titles linked from this page
    categories: list[str]         # Wikipedia categories for this page
    sections: list[str]           # Section titles from table of contents
    revision_id: int              # Current revision ID
    parent_id: int                # Parent revision ID
    
    def html(self):
        """
        Get full page HTML content.
        
        Returns:
        - str: Complete HTML content
        
        Warning: Can be slow for long pages
        """
    
    def section(self, section_title):
        """
        Get plain text content of a specific section.
        
        Parameters:
        - section_title (str): Section title from self.sections
        
        Returns:
        - str or None: Section content or None if not found
        
        Warning: Only returns content between section and next subsection
        """

Exception Classes

Custom exceptions for error handling.

class WikipediaException(Exception):
    """Base exception class for all Wikipedia errors."""
    
    def __init__(self, error):
        self.error = error

class PageError(WikipediaException):
    """Raised when no Wikipedia page matches a query."""
    
    def __init__(self, pageid=None, *args):
        # Sets self.pageid or self.title based on parameters
        pass

class DisambiguationError(WikipediaException):
    """Raised when a page resolves to a disambiguation page."""
    
    def __init__(self, title, may_refer_to):
        self.title = title
        self.options = may_refer_to  # List of possible page titles

class RedirectError(WikipediaException):
    """Raised when a page redirects but redirect=False."""
    
    def __init__(self, title):
        self.title = title

class HTTPTimeoutError(WikipediaException):
    """Raised when MediaWiki API request times out."""
    
    def __init__(self, query):
        self.query = query

Error Handling Examples

import wikipedia

# Handle page not found
try:
    page = wikipedia.page("Nonexistent Page", auto_suggest=False)
except wikipedia.PageError as e:
    print(f"Page not found: {e}")

# Handle disambiguation pages
try:
    page = wikipedia.page("Python")  # Might be ambiguous
except wikipedia.DisambiguationError as e:
    print(f"Multiple pages found for '{e.title}':")
    for option in e.options[:5]:  # Show first 5 options
        print(f"  - {option}")
    # Choose specific page
    page = wikipedia.page(e.options[0])

# Handle redirect pages
try:
    page = wikipedia.page("Redirect Page", redirect=False)
except wikipedia.RedirectError as e:
    print(f"Page '{e.title}' redirects. Set redirect=True to follow.")

# Handle API timeouts with retry logic
import time

def robust_search(query, max_retries=3):
    for attempt in range(max_retries):
        try:
            return wikipedia.search(query)
        except wikipedia.HTTPTimeoutError as e:
            if attempt < max_retries - 1:
                print(f"Timeout on attempt {attempt + 1}, retrying...")
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                print(f"Failed after {max_retries} attempts: {e}")
                return []

# Handle general Wikipedia exceptions
try:
    results = wikipedia.search("test query")
    page = wikipedia.page(results[0])
except wikipedia.WikipediaException as e:
    print(f"Wikipedia error: {e}")
except IndexError:
    print("No search results found")