Python wrapper for Wikipedia's API that provides easy access to page content, sections, links, categories, and translations
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Core functionality for initializing Wikipedia API connections, configuring extraction formats, language settings, and creating page objects. The Wikipedia class serves as the main entry point for all Wikipedia data access.
Create and configure a Wikipedia API wrapper instance with user agent, language, format settings, and connection parameters.
class Wikipedia:
def __init__(
self,
user_agent: str,
language: str = "en",
variant: Optional[str] = None,
extract_format: ExtractFormat = ExtractFormat.WIKI,
headers: Optional[dict[str, Any]] = None,
extra_api_params: Optional[dict[str, Any]] = None,
**request_kwargs
):
"""
Initialize Wikipedia API wrapper.
Parameters:
- user_agent: HTTP User-Agent identifier (required, min 5 chars)
- language: Wikipedia language edition (e.g., 'en', 'es', 'fr')
- variant: Language variant for languages that support conversion
- extract_format: Content extraction format (WIKI or HTML)
- headers: Additional HTTP headers for requests
- extra_api_params: Additional API parameters for all requests
- request_kwargs: Additional parameters for requests library (timeout, proxies, etc.)
Raises:
AssertionError: If user_agent is too short or language is invalid
"""import wikipediaapi
# Basic initialization
wiki = wikipediaapi.Wikipedia(
user_agent='MyApp/1.0 (contact@example.com)',
language='en'
)
# With custom settings
wiki = wikipediaapi.Wikipedia(
user_agent='MyApp/1.0 (contact@example.com)',
language='zh',
variant='zh-cn', # Simplified Chinese variant
extract_format=wikipediaapi.ExtractFormat.HTML,
headers={'Accept-Language': 'zh-CN,zh;q=0.9'},
timeout=15.0, # Custom timeout
proxies={'http': 'http://proxy:8080'} # Proxy support
)
# Multiple language instances
wiki_en = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
wiki_es = wikipediaapi.Wikipedia('MyApp/1.0', 'es')
wiki_fr = wikipediaapi.Wikipedia('MyApp/1.0', 'fr')Create WikipediaPage objects for accessing Wikipedia content. Pages are created with lazy loading - content is fetched only when accessed.
def page(
self,
title: str,
ns: WikiNamespace = Namespace.MAIN,
unquote: bool = False
) -> WikipediaPage:
"""
Create a WikipediaPage object for the specified title.
Parameters:
- title: Page title as used in Wikipedia URL
- ns: Wikipedia namespace (default: MAIN)
- unquote: Whether to URL-unquote the title
Returns:
WikipediaPage object (content loaded lazily)
"""
def article(
self,
title: str,
ns: WikiNamespace = Namespace.MAIN,
unquote: bool = False
) -> WikipediaPage:
"""
Alias for page() method.
Parameters:
- title: Page title as used in Wikipedia URL
- ns: Wikipedia namespace (default: MAIN)
- unquote: Whether to URL-unquote the title
Returns:
WikipediaPage object (content loaded lazily)
"""# Basic page creation
page = wiki.page('Python_(programming_language)')
# Page in different namespace
category_page = wiki.page('Physics', ns=wikipediaapi.Namespace.CATEGORY)
# URL-encoded title (Hindi Wikipedia example)
hindi_page = wiki.page('%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8', unquote=True)
# Using article() alias
page = wiki.article('Machine_learning')Low-level methods for direct Wikipedia API access. These methods are used internally by WikipediaPage properties but can be called directly for custom use cases.
def extracts(self, page: WikipediaPage, **kwargs) -> str:
"""
Get page content extracts with custom parameters.
Parameters:
- page: WikipediaPage object
- kwargs: Additional API parameters (exsentences, exchars, etc.)
Returns:
Extracted page content as string
"""
def info(self, page: WikipediaPage) -> WikipediaPage:
"""
Get page metadata and information.
Parameters:
- page: WikipediaPage object
Returns:
Updated WikipediaPage with metadata populated
"""
def langlinks(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]:
"""
Get language links for the page.
Parameters:
- page: WikipediaPage object
- kwargs: Additional API parameters
Returns:
Dictionary mapping language codes to WikipediaPage objects
"""
def links(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]:
"""
Get internal links from the page.
Parameters:
- page: WikipediaPage object
- kwargs: Additional API parameters
Returns:
Dictionary mapping page titles to WikipediaPage objects
"""
def backlinks(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]:
"""
Get pages that link to this page.
Parameters:
- page: WikipediaPage object
- kwargs: Additional API parameters
Returns:
Dictionary mapping page titles to WikipediaPage objects
"""
def categories(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]:
"""
Get categories for the page.
Parameters:
- page: WikipediaPage object
- kwargs: Additional API parameters
Returns:
Dictionary mapping category names to WikipediaPage objects
"""
def categorymembers(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]:
"""
Get pages in the category (for category pages).
Parameters:
- page: WikipediaPage object representing a category
- kwargs: Additional API parameters
Returns:
Dictionary mapping page titles to WikipediaPage objects
"""Access Wikipedia instance configuration after initialization.
@property
def language(self) -> str:
"""Get the configured language."""
@property
def variant(self) -> Optional[str]:
"""Get the configured language variant."""
@property
def extract_format(self) -> ExtractFormat:
"""Get the configured extraction format."""The Wikipedia class automatically manages HTTP sessions and cleanup.
def __del__(self) -> None:
"""Automatically closes the HTTP session when Wikipedia object is destroyed."""# Session is automatically managed
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
# ... use wiki object
# Session automatically closed when wiki goes out of scope
# For long-running applications, you can explicitly manage lifecycle
def process_pages(page_titles):
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
try:
for title in page_titles:
page = wiki.page(title)
# Process page...
finally:
# Session automatically cleaned up
passThe Wikipedia class validates parameters and raises AssertionError for invalid configurations:
# These will raise AssertionError
try:
wiki = wikipediaapi.Wikipedia("", "en") # Empty user agent
except AssertionError as e:
print(f"Error: {e}")
try:
wiki = wikipediaapi.Wikipedia("MyApp", "") # Empty language
except AssertionError as e:
print(f"Error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-wikipedia-api