Python wrapper for Wikipedia's API that provides easy access to page content, sections, links, categories, and translations
npx @tessl/cli install tessl/pypi-wikipedia-api@0.8.0A comprehensive Python wrapper for Wikipedia's API that provides easy access to page content, sections, links, categories, and translations. This library enables developers to extract structured information from Wikipedia articles across all language editions, with support for various content formats, automatic redirect handling, and robust error management.
pip install wikipedia-apiimport wikipediaapiimport wikipediaapi
# Initialize Wikipedia object with required user agent and language
wiki = wikipediaapi.Wikipedia(
user_agent='MyProject/1.0 (contact@example.com)',
language='en'
)
# Get a Wikipedia page
page = wiki.page('Python_(programming_language)')
# Check if page exists and get basic information
if page.exists():
print(f"Title: {page.title}")
print(f"Summary: {page.summary[:100]}...")
print(f"URL: {page.fullurl}")
# Access page content
print(f"Full text length: {len(page.text)}")
# Get page sections
for section in page.sections:
print(f"Section: {section.title} (level {section.level})")
# Get related pages
print(f"Categories: {len(page.categories)}")
print(f"Links: {len(page.links)}")
print(f"Language versions: {len(page.langlinks)}")Wikipedia-API uses a lazy-loading design with three main components:
The library automatically handles API pagination, redirects, and provides both WIKI and HTML extraction formats. All content is fetched on-demand when properties are accessed, enabling efficient usage patterns.
Core functionality for initializing Wikipedia API connections, configuring extraction formats, language settings, and creating page objects. Provides the foundation for all Wikipedia data access.
class Wikipedia:
def __init__(
self,
user_agent: str,
language: str = "en",
variant: Optional[str] = None,
extract_format: ExtractFormat = ExtractFormat.WIKI,
headers: Optional[dict[str, Any]] = None,
extra_api_params: Optional[dict[str, Any]] = None,
**request_kwargs
): ...
def page(
self,
title: str,
ns: WikiNamespace = Namespace.MAIN,
unquote: bool = False
) -> WikipediaPage: ...
def article(
self,
title: str,
ns: WikiNamespace = Namespace.MAIN,
unquote: bool = False
) -> WikipediaPage: ... # Alias for page()
def extracts(self, page: WikipediaPage, **kwargs) -> str: ...
def info(self, page: WikipediaPage) -> WikipediaPage: ...
def langlinks(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]: ...
def links(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]: ...
def backlinks(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]: ...
def categories(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]: ...
def categorymembers(self, page: WikipediaPage, **kwargs) -> dict[str, WikipediaPage]: ...Extract and access Wikipedia page content including summaries, full text, sections, and hierarchical page structure. Supports both WIKI and HTML formats with automatic section parsing.
class WikipediaPage:
@property
def title(self) -> str: ...
@property
def language(self) -> str: ...
@property
def variant(self) -> Optional[str]: ...
@property
def namespace(self) -> int: ...
@property
def pageid(self) -> int: ... # -1 if page doesn't exist
@property
def fullurl(self) -> str: ...
@property
def canonicalurl(self) -> str: ...
@property
def displaytitle(self) -> str: ...
def exists(self) -> bool: ...
@property
def summary(self) -> str: ...
@property
def text(self) -> str: ...
@property
def sections(self) -> list[WikipediaPageSection]: ...
def section_by_title(self, title: str) -> Optional[WikipediaPageSection]: ...
def sections_by_title(self, title: str) -> list[WikipediaPageSection]: ...
class WikipediaPageSection:
@property
def title(self) -> str: ...
@property
def text(self) -> str: ...
@property
def level(self) -> int: ...
@property
def sections(self) -> list[WikipediaPageSection]: ...
def section_by_title(self, title: str) -> Optional[WikipediaPageSection]: ...
def full_text(self, level: int = 1) -> str: ...Access Wikipedia's link structure including internal page links, backlinks, and language translations. Enables navigation between related pages and discovery of page relationships.
class WikipediaPage:
@property
def links(self) -> dict[str, WikipediaPage]: ...
@property
def backlinks(self) -> dict[str, WikipediaPage]: ...
@property
def langlinks(self) -> dict[str, WikipediaPage]: ...Work with Wikipedia's category system including page categories and category membership. Enables discovery of related content and hierarchical organization navigation.
class WikipediaPage:
@property
def categories(self) -> dict[str, WikipediaPage]: ...
@property
def categorymembers(self) -> dict[str, WikipediaPage]: ...class ExtractFormat(IntEnum):
WIKI = 1 # Wiki format (allows recognizing subsections)
HTML = 2 # HTML format (allows retrieval of HTML tags)
class Namespace(IntEnum):
MAIN = 0
TALK = 1
USER = 2
USER_TALK = 3
WIKIPEDIA = 4
WIKIPEDIA_TALK = 5
FILE = 6
FILE_TALK = 7
MEDIAWIKI = 8
MEDIAWIKI_TALK = 9
TEMPLATE = 10
TEMPLATE_TALK = 11
HELP = 12
HELP_TALK = 13
CATEGORY = 14
CATEGORY_TALK = 15
PORTAL = 100
PORTAL_TALK = 101
PROJECT = 102
PROJECT_TALK = 103
REFERENCE = 104
REFERENCE_TALK = 105
BOOK = 108
BOOK_TALK = 109
DRAFT = 118
DRAFT_TALK = 119
EDUCATION_PROGRAM = 446
EDUCATION_PROGRAM_TALK = 447
TIMED_TEXT = 710
TIMED_TEXT_TALK = 711
MODULE = 828
MODULE_TALK = 829
GADGET = 2300
GADGET_TALK = 2301
GADGET_DEFINITION = 2302
GADGET_DEFINITION_TALK = 2303
# Type aliases
PagesDict = dict[str, WikipediaPage]
WikiNamespace = Union[Namespace, int]
# Utility function
def namespace2int(namespace: WikiNamespace) -> int: ...