Google Cloud Natural Language API client library providing sentiment analysis, entity recognition, text classification, and content moderation capabilities
—
Identifies and extracts named entities (people, places, organizations, events, etc.) from text, providing detailed information about each entity including type classification, salience scores, and mention locations within the text. Entity analysis is essential for information extraction, content understanding, and knowledge graph construction.
Identifies named entities in the provided text and returns detailed information about each entity found.
def analyze_entities(
self,
request: Optional[Union[AnalyzeEntitiesRequest, dict]] = None,
*,
document: Optional[Document] = None,
encoding_type: Optional[EncodingType] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = ()
) -> AnalyzeEntitiesResponse:
"""
Finds named entities in the text and returns information about them.
Args:
request: The request object containing document and options
document: Input document for analysis
encoding_type: Text encoding type for offset calculations
retry: Retry configuration for the request
timeout: Request timeout in seconds
metadata: Additional metadata to send with the request
Returns:
AnalyzeEntitiesResponse containing found entities and metadata
"""from google.cloud import language
# Initialize client
client = language.LanguageServiceClient()
# Create document
document = language.Document(
content="Google was founded by Larry Page and Sergey Brin in Mountain View, California.",
type_=language.Document.Type.PLAIN_TEXT
)
# Analyze entities
response = client.analyze_entities(
request={"document": document}
)
# Process entities
for entity in response.entities:
print(f"Entity: {entity.name}")
print(f"Type: {entity.type_.name}")
print(f"Salience: {entity.salience}")
print(f"Metadata: {dict(entity.metadata)}")
# Print mentions
for mention in entity.mentions:
print(f" Mention: '{mention.text.content}' ({mention.type_.name})")
print()class AnalyzeEntitiesRequest:
document: Document
encoding_type: EncodingTypeclass AnalyzeEntitiesResponse:
entities: MutableSequence[Entity]
language: strRepresents a named entity found in the text with comprehensive metadata.
class Entity:
class Type(proto.Enum):
UNKNOWN = 0
PERSON = 1
LOCATION = 2
ORGANIZATION = 3
EVENT = 4
WORK_OF_ART = 5
CONSUMER_GOOD = 6
OTHER = 7
PHONE_NUMBER = 9
ADDRESS = 10
DATE = 11
NUMBER = 12
PRICE = 13
name: str # Canonical name of the entity
type_: Type # Entity type classification
metadata: MutableMapping[str, str] # Additional metadata (Wikipedia URL, etc.)
salience: float # Salience/importance score [0.0, 1.0]
mentions: MutableSequence[EntityMention] # All mentions of this entity in text
sentiment: Sentiment # Overall sentiment for this entity (v1/v1beta2 only)Entity Types:
Represents a specific mention of an entity within the text.
class EntityMention:
class Type(proto.Enum):
TYPE_UNKNOWN = 0
PROPER = 1 # Proper noun (e.g., "Google")
COMMON = 2 # Common noun (e.g., "company")
text: TextSpan # The mention text with position
type_: Type # Mention type (proper/common noun)
sentiment: Sentiment # Sentiment associated with this mention (v1/v1beta2 only)
probability: float # Confidence score for this mention [0.0, 1.0]def extract_entity_metadata(entities):
"""Process entity metadata for additional information."""
processed = []
for entity in entities:
entity_info = {
'name': entity.name,
'type': entity.type_.name,
'salience': entity.salience,
'mentions_count': len(entity.mentions)
}
# Extract Wikipedia URL if available
if 'wikipedia_url' in entity.metadata:
entity_info['wikipedia'] = entity.metadata['wikipedia_url']
# Extract knowledge graph ID if available
if 'mid' in entity.metadata:
entity_info['knowledge_graph_id'] = entity.metadata['mid']
processed.append(entity_info)
return processed
# Usage
response = client.analyze_entities(request={"document": document})
metadata = extract_entity_metadata(response.entities)
for info in metadata:
print(f"Entity: {info['name']} ({info['type']})")
if 'wikipedia' in info:
print(f" Wikipedia: {info['wikipedia']}")
print(f" Salience: {info['salience']}")def filter_entities_by_type(entities, target_types):
"""Filter entities by specific types."""
from google.cloud.language import Entity
# Convert string types to enum values
type_mapping = {
'PERSON': Entity.Type.PERSON,
'LOCATION': Entity.Type.LOCATION,
'ORGANIZATION': Entity.Type.ORGANIZATION,
'EVENT': Entity.Type.EVENT,
'WORK_OF_ART': Entity.Type.WORK_OF_ART,
'CONSUMER_GOOD': Entity.Type.CONSUMER_GOOD,
'PHONE_NUMBER': Entity.Type.PHONE_NUMBER,
'ADDRESS': Entity.Type.ADDRESS,
'DATE': Entity.Type.DATE,
'NUMBER': Entity.Type.NUMBER,
'PRICE': Entity.Type.PRICE,
}
target_enum_types = [type_mapping[t] for t in target_types if t in type_mapping]
return [entity for entity in entities if entity.type_ in target_enum_types]
# Usage - extract only people and organizations
response = client.analyze_entities(request={"document": document})
people_and_orgs = filter_entities_by_type(
response.entities,
['PERSON', 'ORGANIZATION']
)
for entity in people_and_orgs:
print(f"{entity.name} ({entity.type_.name})")def get_most_salient_entities(entities, limit=5):
"""Get the most important entities by salience score."""
sorted_entities = sorted(entities, key=lambda e: e.salience, reverse=True)
return sorted_entities[:limit]
# Usage
response = client.analyze_entities(request={"document": document})
top_entities = get_most_salient_entities(response.entities, limit=3)
print("Most salient entities:")
for entity in top_entities:
print(f" {entity.name}: {entity.salience:.3f}")def analyze_entity_mentions(entity):
"""Analyze mentions of a specific entity."""
print(f"Entity: {entity.name}")
print(f"Total mentions: {len(entity.mentions)}")
proper_mentions = [m for m in entity.mentions if m.type_ == language.EntityMention.Type.PROPER]
common_mentions = [m for m in entity.mentions if m.type_ == language.EntityMention.Type.COMMON]
print(f"Proper noun mentions: {len(proper_mentions)}")
print(f"Common noun mentions: {len(common_mentions)}")
print("Mention details:")
for i, mention in enumerate(entity.mentions):
print(f" {i+1}. '{mention.text.content}' ({mention.type_.name})")
print(f" Position: {mention.text.begin_offset}")
print(f" Confidence: {mention.probability:.3f}")
# Usage
response = client.analyze_entities(request={"document": document})
if response.entities:
analyze_entity_mentions(response.entities[0])def process_long_document(client, long_text, chunk_size=4000):
"""Process long documents by chunking."""
import textwrap
# Split into chunks
chunks = textwrap.wrap(long_text, chunk_size, break_long_words=False)
all_entities = []
for i, chunk in enumerate(chunks):
print(f"Processing chunk {i+1}/{len(chunks)}")
document = language.Document(
content=chunk,
type_=language.Document.Type.PLAIN_TEXT
)
response = client.analyze_entities(request={"document": document})
all_entities.extend(response.entities)
# Deduplicate entities by name
unique_entities = {}
for entity in all_entities:
if entity.name not in unique_entities:
unique_entities[entity.name] = entity
else:
# Merge salience scores (take maximum)
if entity.salience > unique_entities[entity.name].salience:
unique_entities[entity.name] = entity
return list(unique_entities.values())# Process HTML content to extract entities
html_content = """
<html>
<body>
<h1>Tech News</h1>
<p>Apple announced new features at their conference in Cupertino.
CEO Tim Cook presented the latest innovations to the audience.</p>
</body>
</html>
"""
document = language.Document(
content=html_content,
type_=language.Document.Type.HTML
)
response = client.analyze_entities(request={"document": document})
for entity in response.entities:
print(f"Entity: {entity.name} ({entity.type_.name})")from google.api_core import exceptions
try:
response = client.analyze_entities(
request={"document": document},
timeout=15.0
)
except exceptions.InvalidArgument as e:
print(f"Invalid document format: {e}")
except exceptions.ResourceExhausted:
print("API quota exceeded")
except exceptions.GoogleAPIError as e:
print(f"API error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-language