Python wrapper for Wikipedia's API that provides easy access to page content, sections, links, categories, and translations
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Extract and access Wikipedia page content including summaries, full text, sections, and hierarchical page structure. Content is loaded lazily when properties are first accessed, with support for both WIKI and HTML extraction formats.
Access various forms of page content from summary to full text with sections.
class WikipediaPage:
@property
def summary(self) -> str:
"""
Get the page summary (lead section without subsections).
Returns:
Summary text as string. Empty string if page doesn't exist.
"""
@property
def text(self) -> str:
"""
Get the complete page text including all sections.
Returns:
Full page text with section headers. Combines summary and all sections.
"""
@property
def sections(self) -> list[WikipediaPageSection]:
"""
Get all top-level sections of the page.
Returns:
List of WikipediaPageSection objects in document order.
"""import wikipediaapi
wiki = wikipediaapi.Wikipedia('MyApp/1.0', 'en')
page = wiki.page('Artificial_intelligence')
# Get page summary
print("Summary:")
print(page.summary[:200] + "...")
# Get full page text
full_text = page.text
print(f"Full text length: {len(full_text)} characters")
# Access all sections
print("\nTop-level sections:")
for i, section in enumerate(page.sections):
print(f"{i+1}. {section.title} (level {section.level})")Check if pages exist and access basic metadata and URLs.
class WikipediaPage:
def exists(self) -> bool:
"""
Check if the page exists on Wikipedia.
Returns:
True if page exists, False otherwise.
"""
@property
def title(self) -> str:
"""
Get the page title.
Returns:
Page title as string.
"""
@property
def language(self) -> str:
"""
Get the page language.
Returns:
Language code (e.g., 'en', 'es', 'fr').
"""
@property
def variant(self) -> Optional[str]:
"""
Get the language variant if specified.
Returns:
Language variant code or None.
"""
@property
def namespace(self) -> int:
"""
Get the page namespace.
Returns:
Namespace integer (0 for main, 14 for categories, etc.).
"""
@property
def pageid(self) -> int:
"""
Get the unique page ID.
Returns:
Integer page ID, or -1 if page doesn't exist.
"""
@property
def fullurl(self) -> str:
"""
Get the full URL to the page.
Returns:
Complete URL to the Wikipedia page.
"""
@property
def canonicalurl(self) -> str:
"""
Get the canonical URL to the page.
Returns:
Canonical URL to the Wikipedia page.
"""
@property
def editurl(self) -> str:
"""
Get the edit URL for the page.
Returns:
URL for editing the Wikipedia page.
"""
@property
def displaytitle(self) -> str:
"""
Get the display title (may differ from title for formatting).
Returns:
Display title with formatting.
"""# Check if page exists
page = wiki.page('Nonexistent_Page_123456')
if page.exists():
print(f"Page '{page.title}' exists")
print(f"Language: {page.language}")
print(f"Namespace: {page.namespace}")
print(f"Page ID: {page.pageid}")
print(f"URL: {page.fullurl}")
else:
print("Page does not exist")
print(f"Page ID: {page.pageid}") # Will be -1 for non-existent pages
# Page metadata
real_page = wiki.page('Python_(programming_language)')
print(f"Title: {real_page.title}")
print(f"Display Title: {real_page.displaytitle}")
print(f"Exists: {real_page.exists()}")
print(f"Language: {real_page.language}")
print(f"Page ID: {real_page.pageid}")
print(f"Full URL: {real_page.fullurl}")
print(f"Canonical URL: {real_page.canonicalurl}")
print(f"Edit URL: {real_page.editurl}")Navigate and search through page sections with hierarchical structure support.
class WikipediaPage:
def section_by_title(self, title: str) -> Optional[WikipediaPageSection]:
"""
Get the last section with the specified title.
Parameters:
- title: Section title to search for
Returns:
WikipediaPageSection object or None if not found.
"""
def sections_by_title(self, title: str) -> list[WikipediaPageSection]:
"""
Get all sections with the specified title.
Parameters:
- title: Section title to search for
Returns:
List of WikipediaPageSection objects. Empty list if none found.
"""page = wiki.page('Machine_learning')
# Find specific section
history_section = page.section_by_title('History')
if history_section:
print(f"Found section: {history_section.title}")
print(f"Section text: {history_section.text[:100]}...")
# Find all sections with same title (if duplicated)
overview_sections = page.sections_by_title('Overview')
print(f"Found {len(overview_sections)} sections titled 'Overview'")
# Navigate section hierarchy
for section in page.sections:
print(f"Section: {section.title}")
for subsection in section.sections:
print(f" Subsection: {subsection.title}")Access individual section content and hierarchical structure.
class WikipediaPageSection:
@property
def title(self) -> str:
"""
Get the section title.
Returns:
Section title as string.
"""
@property
def text(self) -> str:
"""
Get the section text content (without subsections).
Returns:
Section text as string.
"""
@property
def level(self) -> int:
"""
Get the section heading level.
Returns:
Integer level (0=top-level, 1=subsection, etc.).
"""
@property
def sections(self) -> list[WikipediaPageSection]:
"""
Get direct subsections of this section.
Returns:
List of WikipediaPageSection objects.
"""
def section_by_title(self, title: str) -> Optional[WikipediaPageSection]:
"""
Find subsection by title within this section.
Parameters:
- title: Subsection title to search for
Returns:
WikipediaPageSection object or None if not found.
"""
def full_text(self, level: int = 1) -> str:
"""
Get section text including all subsections with proper formatting.
Parameters:
- level: Starting heading level for formatting
Returns:
Complete section text with subsections and headers.
"""page = wiki.page('Climate_change')
# Work with sections
for section in page.sections:
print(f"\n=== {section.title} (Level {section.level}) ===")
print(f"Text length: {len(section.text)} characters")
# Show subsections
if section.sections:
print(f"Subsections ({len(section.sections)}):")
for subsection in section.sections:
print(f" - {subsection.title}")
# Get full text with subsections
if section.title == "Causes":
full_content = section.full_text()
print(f"Full section with subsections: {len(full_content)} characters")
# Find nested subsection
effects_section = page.section_by_title('Effects')
if effects_section:
temperature_subsection = effects_section.section_by_title('Temperature')
if temperature_subsection:
print(f"Found nested subsection: {temperature_subsection.title}")
print(f"Content: {temperature_subsection.text[:150]}...")Wikipedia-API supports two extraction formats that affect how content is parsed and presented.
wiki = wikipediaapi.Wikipedia(
'MyApp/1.0',
'en',
extract_format=wikipediaapi.ExtractFormat.WIKI
)wiki = wikipediaapi.Wikipedia(
'MyApp/1.0',
'en',
extract_format=wikipediaapi.ExtractFormat.HTML
)<h1>, <h2>, etc.# WIKI format
wiki_plain = wikipediaapi.Wikipedia('MyApp/1.0', 'en',
extract_format=wikipediaapi.ExtractFormat.WIKI)
page_plain = wiki_plain.page('Python_(programming_language)')
# HTML format
wiki_html = wikipediaapi.Wikipedia('MyApp/1.0', 'en',
extract_format=wikipediaapi.ExtractFormat.HTML)
page_html = wiki_html.page('Python_(programming_language)')
print("WIKI format summary:")
print(page_plain.summary[:100])
print("\nHTML format summary:")
print(page_html.summary[:100])Install with Tessl CLI
npx tessl i tessl/pypi-wikipedia-api