Industrial-strength Natural Language Processing (NLP) in Python
—
Interactive visualization tools for displaying linguistic analysis including dependency trees, named entities, and custom visualizations. spaCy's displaCy visualizer renders analysis results in HTML/SVG format for web browsers and Jupyter notebooks.
Core functions for generating HTML/SVG visualizations of linguistic analysis.
def render(docs: Union[Doc, Span, List[Doc]], style: str = "dep",
page: bool = False, minify: bool = False,
jupyter: bool = None, options: dict = None,
manual: bool = False) -> str:
"""
Render a visualization of linguistic analysis.
Args:
docs: Doc(s) or Span(s) to visualize
style: Visualization style ('dep' or 'ent')
page: Render as full HTML page
minify: Minify HTML output
jupyter: Override Jupyter notebook detection
options: Visualization options and settings
manual: Don't parse Doc, use manual data
Returns:
HTML/SVG string of the visualization
"""
def serve(docs: Union[Doc, List[Doc]], style: str = "dep",
port: int = 5000, host: str = "0.0.0.0",
options: dict = None, manual: bool = False) -> None:
"""
Serve visualizations on a web server.
Args:
docs: Doc(s) to visualize
style: Visualization style ('dep' or 'ent')
port: Port number for web server
host: Host address to bind to
options: Visualization options and settings
manual: Don't parse Doc, use manual data
"""Functions to extract visualization data from spaCy objects.
def parse_deps(doc: Doc, options: dict = None) -> dict:
"""
Parse a Doc object for dependency visualization.
Args:
doc: The Doc object to parse
options: Visualization options
Returns:
Dictionary with dependency visualization data
"""
def parse_ents(doc: Doc, options: dict = None) -> dict:
"""
Parse a Doc object for entity visualization.
Args:
doc: The Doc object to parse
options: Visualization options
Returns:
Dictionary with entity visualization data
"""Classes that handle the actual rendering of visualizations.
class DependencyRenderer:
"""Renderer for dependency parse visualizations."""
style: str = "dep"
def __init__(self, options: dict = None) -> None:
"""Initialize the dependency renderer."""
def render(self, parsed: List[dict], page: bool = False,
minify: bool = False) -> str:
"""
Render dependency visualization.
Args:
parsed: Parsed dependency data
page: Render as full HTML page
minify: Minify HTML output
Returns:
HTML/SVG string
"""
class EntityRenderer:
"""Renderer for named entity visualizations."""
style: str = "ent"
def __init__(self, options: dict = None) -> None:
"""Initialize the entity renderer."""
def render(self, parsed: List[dict], page: bool = False,
minify: bool = False) -> str:
"""
Render entity visualization.
Args:
parsed: Parsed entity data
page: Render as full HTML page
minify: Minify HTML output
Returns:
HTML/SVG string
"""Configuration options for dependency tree visualizations:
dep_options = {
# Appearance
"compact": False, # Use compact arrow style
"bg": "#ffffff", # Background color
"color": "#000000", # Text color
"font": "Arial", # Font family
"distance": 175, # Distance between tokens
"arrow_stroke": 2, # Arrow line width
"arrow_width": 10, # Arrow head width
"arrow_spacing": 20, # Spacing between arrows
"word_spacing": 45, # Spacing between words
"collapse_punct": True, # Collapse punctuation arcs
"collapse_phrases": False, # Collapse noun phrases
# Filtering
"fine_grained": False, # Use fine-grained POS tags
"add_lemma": False, # Show lemmas below tokens
"collapse_punct": True, # Collapse punctuation dependencies
}Configuration options for named entity visualizations:
ent_options = {
# Entity types and colors
"ents": ["PERSON", "ORG", "GPE", "MONEY"], # Entity types to show
"colors": { # Custom colors for entity types
"PERSON": "#aa9cfc",
"ORG": "#7aecec",
"GPE": "#bfefff",
"MONEY": "#e4e7d2"
},
# Appearance
"template": None, # Custom HTML template
"kb_url_template": None, # Knowledge base URL template
}import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
# Render dependency visualization
html = displacy.render(doc, style="dep", jupyter=False)
print(html)
# Display in Jupyter notebook
displacy.render(doc, style="dep", jupyter=True)
# Save to file
with open("dependency.html", "w") as f:
f.write(displacy.render(doc, style="dep", page=True))import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple Inc. is looking at buying U.K. startup for $1 billion")
# Render entity visualization
html = displacy.render(doc, style="ent", jupyter=False)
print(html)
# Custom entity colors
colors = {"ORG": "#85C1E9", "GPE": "#F8C471", "MONEY": "#82E0AA"}
options = {"ents": ["ORG", "GPE", "MONEY"], "colors": colors}
html = displacy.render(doc, style="ent", options=options)
print(html)import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
texts = [
"Apple Inc. is an American technology company.",
"Google was founded by Larry Page and Sergey Brin.",
"Microsoft Corporation is based in Redmond, Washington."
]
docs = [nlp(text) for text in texts]
# Visualize multiple documents
html = displacy.render(docs, style="ent", page=True)
# Save multi-document visualization
with open("multiple_docs.html", "w") as f:
f.write(html)import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
docs = [nlp("Apple Inc. is buying a startup for $1 billion.")]
# Start visualization server
# This will open a web browser at http://localhost:5000
displacy.serve(docs, style="dep", port=5000)
# Serve on specific host and port
displacy.serve(docs, style="ent", host="127.0.0.1", port=8000)import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("The quick brown fox jumps over the lazy dog")
# Custom dependency visualization options
dep_options = {
"compact": True,
"bg": "#f9f9f9",
"color": "#333333",
"font": "Helvetica",
"distance": 200,
"arrow_stroke": 3,
"arrow_width": 12,
"fine_grained": True,
"add_lemma": True
}
html = displacy.render(doc, style="dep", options=dep_options)
# Custom entity visualization options
doc2 = nlp("Apple Inc. CEO Tim Cook visited London, England")
ent_options = {
"ents": ["PERSON", "ORG", "GPE"],
"colors": {
"PERSON": "#ff6b6b",
"ORG": "#4ecdc4",
"GPE": "#45b7d1"
}
}
html = displacy.render(doc2, style="ent", options=ent_options)from spacy import displacy
# Manual dependency data
manual_dep_data = [
{
"words": [
{"text": "Apple", "tag": "NNP"},
{"text": "is", "tag": "VBZ"},
{"text": "great", "tag": "JJ"}
],
"arcs": [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 1, "end": 2, "label": "acomp", "dir": "right"}
]
}
]
html = displacy.render(manual_dep_data, style="dep", manual=True)
# Manual entity data
manual_ent_data = [
{
"text": "Apple Inc. is a technology company in Cupertino.",
"ents": [
{"start": 0, "end": 10, "label": "ORG"},
{"start": 39, "end": 48, "label": "GPE"}
]
}
]
html = displacy.render(manual_ent_data, style="ent", manual=True)import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
# Filter specific dependencies
def filter_deps(doc):
"""Custom function to filter dependencies."""
filtered_deps = []
for token in doc:
if token.dep_ not in ["punct", "det", "aux"]:
filtered_deps.append({
"start": token.i,
"end": token.head.i,
"label": token.dep_,
"dir": "left" if token.i > token.head.i else "right"
})
return filtered_deps
doc = nlp("The quick brown fox jumps over the lazy dog")
# Create custom visualization data
words = [{"text": token.text, "tag": token.pos_} for token in doc]
arcs = filter_deps(doc)
manual_data = [{"words": words, "arcs": arcs}]
html = displacy.render(manual_data, style="dep", manual=True)# In Jupyter notebook
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple Inc. is looking at buying U.K. startup for $1 billion")
# Auto-detects Jupyter and displays inline
displacy.render(doc, style="dep")
displacy.render(doc, style="ent")
# Force Jupyter rendering
displacy.render(doc, style="dep", jupyter=True)
# Explicitly disable Jupyter for HTML string
html_string = displacy.render(doc, style="dep", jupyter=False)import spacy
from spacy import displacy
import os
nlp = spacy.load("en_core_web_sm")
texts = [
"Apple Inc. announced new products.",
"Google develops artificial intelligence.",
"Microsoft partners with OpenAI.",
"Tesla builds electric vehicles.",
"Amazon operates cloud services."
]
# Process and visualize all documents
docs = list(nlp.pipe(texts))
# Create individual visualizations
for i, doc in enumerate(docs):
html = displacy.render(doc, style="ent", page=True)
filename = f"doc_{i}.html"
with open(filename, "w") as f:
f.write(html)
print(f"Saved {filename}")
# Create combined visualization
combined_html = displacy.render(docs, style="ent", page=True)
with open("combined.html", "w") as f:
f.write(combined_html)import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("The CEO of Apple Inc. announced new products")
# Get raw visualization data
dep_data = displacy.parse_deps(doc)
ent_data = displacy.parse_ents(doc)
print("Dependency data:", dep_data)
print("Entity data:", ent_data)
# Modify data before rendering
ent_data[0]["ents"].append({
"start": 0, "end": 3, "label": "TITLE"
})
# Render modified data
html = displacy.render(ent_data, style="ent", manual=True)Install with Tessl CLI
npx tessl i tessl/pypi-spacy