tessl/pypi-spacy

Industrial-strength Natural Language Processing (NLP) in Python

—

Pending

Overview

Eval results

Files

Visualization

Name: tessl/pypi-spacy
Author: tessl

Interactive visualization tools for displaying linguistic analysis including dependency trees, named entities, and custom visualizations. spaCy's displaCy visualizer renders analysis results in HTML/SVG format for web browsers and Jupyter notebooks.

Capabilities

Rendering Functions

Core functions for generating HTML/SVG visualizations of linguistic analysis.

def render(docs: Union[Doc, Span, List[Doc]], style: str = "dep",
           page: bool = False, minify: bool = False, 
           jupyter: bool = None, options: dict = None,
           manual: bool = False) -> str:
    """
    Render a visualization of linguistic analysis.
    
    Args:
        docs: Doc(s) or Span(s) to visualize
        style: Visualization style ('dep' or 'ent')
        page: Render as full HTML page  
        minify: Minify HTML output
        jupyter: Override Jupyter notebook detection
        options: Visualization options and settings
        manual: Don't parse Doc, use manual data
        
    Returns:
        HTML/SVG string of the visualization
    """

def serve(docs: Union[Doc, List[Doc]], style: str = "dep",
          port: int = 5000, host: str = "0.0.0.0",
          options: dict = None, manual: bool = False) -> None:
    """
    Serve visualizations on a web server.
    
    Args:
        docs: Doc(s) to visualize  
        style: Visualization style ('dep' or 'ent')
        port: Port number for web server
        host: Host address to bind to
        options: Visualization options and settings
        manual: Don't parse Doc, use manual data
    """

Data Parsing Functions

Functions to extract visualization data from spaCy objects.

def parse_deps(doc: Doc, options: dict = None) -> dict:
    """
    Parse a Doc object for dependency visualization.
    
    Args:
        doc: The Doc object to parse
        options: Visualization options
        
    Returns:
        Dictionary with dependency visualization data
    """

def parse_ents(doc: Doc, options: dict = None) -> dict:
    """
    Parse a Doc object for entity visualization.
    
    Args:
        doc: The Doc object to parse
        options: Visualization options
        
    Returns:
        Dictionary with entity visualization data
    """

Renderer Classes

Classes that handle the actual rendering of visualizations.

class DependencyRenderer:
    """Renderer for dependency parse visualizations."""
    
    style: str = "dep"
    
    def __init__(self, options: dict = None) -> None:
        """Initialize the dependency renderer."""
    
    def render(self, parsed: List[dict], page: bool = False, 
               minify: bool = False) -> str:
        """
        Render dependency visualization.
        
        Args:
            parsed: Parsed dependency data
            page: Render as full HTML page
            minify: Minify HTML output
            
        Returns:
            HTML/SVG string
        """

class EntityRenderer:
    """Renderer for named entity visualizations."""
    
    style: str = "ent"
    
    def __init__(self, options: dict = None) -> None:
        """Initialize the entity renderer."""
    
    def render(self, parsed: List[dict], page: bool = False,
               minify: bool = False) -> str:
        """
        Render entity visualization.
        
        Args:
            parsed: Parsed entity data
            page: Render as full HTML page  
            minify: Minify HTML output
            
        Returns:
            HTML/SVG string
        """

Visualization Options

Dependency Visualization Options

Configuration options for dependency tree visualizations:

dep_options = {
    # Appearance
    "compact": False,        # Use compact arrow style
    "bg": "#ffffff",         # Background color
    "color": "#000000",      # Text color
    "font": "Arial",         # Font family
    "distance": 175,         # Distance between tokens
    "arrow_stroke": 2,       # Arrow line width
    "arrow_width": 10,       # Arrow head width
    "arrow_spacing": 20,     # Spacing between arrows
    "word_spacing": 45,      # Spacing between words
    "collapse_punct": True,  # Collapse punctuation arcs
    "collapse_phrases": False, # Collapse noun phrases
    
    # Filtering
    "fine_grained": False,   # Use fine-grained POS tags
    "add_lemma": False,      # Show lemmas below tokens
    "collapse_punct": True,  # Collapse punctuation dependencies
}

Entity Visualization Options

Configuration options for named entity visualizations:

ent_options = {
    # Entity types and colors
    "ents": ["PERSON", "ORG", "GPE", "MONEY"],  # Entity types to show
    "colors": {              # Custom colors for entity types
        "PERSON": "#aa9cfc",
        "ORG": "#7aecec", 
        "GPE": "#bfefff",
        "MONEY": "#e4e7d2"
    },
    
    # Appearance  
    "template": None,        # Custom HTML template
    "kb_url_template": None, # Knowledge base URL template
}

Usage Examples

Basic Dependency Visualization

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

# Render dependency visualization
html = displacy.render(doc, style="dep", jupyter=False)
print(html)

# Display in Jupyter notebook
displacy.render(doc, style="dep", jupyter=True)

# Save to file
with open("dependency.html", "w") as f:
    f.write(displacy.render(doc, style="dep", page=True))

Named Entity Visualization

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple Inc. is looking at buying U.K. startup for $1 billion")

# Render entity visualization
html = displacy.render(doc, style="ent", jupyter=False)
print(html)

# Custom entity colors
colors = {"ORG": "#85C1E9", "GPE": "#F8C471", "MONEY": "#82E0AA"}
options = {"ents": ["ORG", "GPE", "MONEY"], "colors": colors}

html = displacy.render(doc, style="ent", options=options)
print(html)

Visualizing Multiple Documents

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

texts = [
    "Apple Inc. is an American technology company.",
    "Google was founded by Larry Page and Sergey Brin.",
    "Microsoft Corporation is based in Redmond, Washington."
]

docs = [nlp(text) for text in texts]

# Visualize multiple documents
html = displacy.render(docs, style="ent", page=True)

# Save multi-document visualization
with open("multiple_docs.html", "w") as f:
    f.write(html)

Web Server Visualization

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
docs = [nlp("Apple Inc. is buying a startup for $1 billion.")]

# Start visualization server
# This will open a web browser at http://localhost:5000
displacy.serve(docs, style="dep", port=5000)

# Serve on specific host and port
displacy.serve(docs, style="ent", host="127.0.0.1", port=8000)

Custom Visualization Options

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("The quick brown fox jumps over the lazy dog")

# Custom dependency visualization options
dep_options = {
    "compact": True,
    "bg": "#f9f9f9",  
    "color": "#333333",
    "font": "Helvetica",
    "distance": 200,
    "arrow_stroke": 3,
    "arrow_width": 12,
    "fine_grained": True,
    "add_lemma": True
}

html = displacy.render(doc, style="dep", options=dep_options)

# Custom entity visualization options
doc2 = nlp("Apple Inc. CEO Tim Cook visited London, England")
ent_options = {
    "ents": ["PERSON", "ORG", "GPE"],
    "colors": {
        "PERSON": "#ff6b6b",
        "ORG": "#4ecdc4", 
        "GPE": "#45b7d1"
    }
}

html = displacy.render(doc2, style="ent", options=ent_options)

Manual Data Visualization

from spacy import displacy

# Manual dependency data
manual_dep_data = [
    {
        "words": [
            {"text": "Apple", "tag": "NNP"},
            {"text": "is", "tag": "VBZ"},
            {"text": "great", "tag": "JJ"}
        ],
        "arcs": [
            {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
            {"start": 1, "end": 2, "label": "acomp", "dir": "right"}
        ]
    }
]

html = displacy.render(manual_dep_data, style="dep", manual=True)

# Manual entity data
manual_ent_data = [
    {
        "text": "Apple Inc. is a technology company in Cupertino.",
        "ents": [
            {"start": 0, "end": 10, "label": "ORG"},
            {"start": 39, "end": 48, "label": "GPE"}
        ]
    }
]

html = displacy.render(manual_ent_data, style="ent", manual=True)

Advanced Customization

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

# Filter specific dependencies  
def filter_deps(doc):
    """Custom function to filter dependencies."""
    filtered_deps = []
    for token in doc:
        if token.dep_ not in ["punct", "det", "aux"]:
            filtered_deps.append({
                "start": token.i,
                "end": token.head.i,
                "label": token.dep_,
                "dir": "left" if token.i > token.head.i else "right"
            })
    return filtered_deps

doc = nlp("The quick brown fox jumps over the lazy dog")

# Create custom visualization data
words = [{"text": token.text, "tag": token.pos_} for token in doc]
arcs = filter_deps(doc)

manual_data = [{"words": words, "arcs": arcs}]
html = displacy.render(manual_data, style="dep", manual=True)

Jupyter Notebook Integration

# In Jupyter notebook
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple Inc. is looking at buying U.K. startup for $1 billion")

# Auto-detects Jupyter and displays inline
displacy.render(doc, style="dep")
displacy.render(doc, style="ent")

# Force Jupyter rendering
displacy.render(doc, style="dep", jupyter=True)

# Explicitly disable Jupyter for HTML string
html_string = displacy.render(doc, style="dep", jupyter=False)

Batch Visualization Processing

import spacy
from spacy import displacy
import os

nlp = spacy.load("en_core_web_sm")

texts = [
    "Apple Inc. announced new products.",
    "Google develops artificial intelligence.", 
    "Microsoft partners with OpenAI.",
    "Tesla builds electric vehicles.",
    "Amazon operates cloud services."
]

# Process and visualize all documents
docs = list(nlp.pipe(texts))

# Create individual visualizations
for i, doc in enumerate(docs):
    html = displacy.render(doc, style="ent", page=True)
    filename = f"doc_{i}.html"
    with open(filename, "w") as f:
        f.write(html)
    print(f"Saved {filename}")

# Create combined visualization
combined_html = displacy.render(docs, style="ent", page=True)
with open("combined.html", "w") as f:
    f.write(combined_html)

Export and Customization

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("The CEO of Apple Inc. announced new products")

# Get raw visualization data
dep_data = displacy.parse_deps(doc)
ent_data = displacy.parse_ents(doc)

print("Dependency data:", dep_data)
print("Entity data:", ent_data)

# Modify data before rendering
ent_data[0]["ents"].append({
    "start": 0, "end": 3, "label": "TITLE"
})

# Render modified data
html = displacy.render(ent_data, style="ent", manual=True)

Install with Tessl CLI