Google Cloud Natural Language API client library providing sentiment analysis, entity recognition, text classification, and content moderation capabilities
—
Performs multiple types of analysis in a single API call for efficiency, allowing you to get sentiment, entities, syntax, classification, and moderation results simultaneously. This comprehensive analysis approach reduces API calls, improves performance, and provides a complete understanding of text content in one operation.
Performs comprehensive text analysis including sentiment, entities, syntax (v1/v1beta2), classification, and moderation based on the specified features.
def annotate_text(
self,
request: Optional[Union[AnnotateTextRequest, dict]] = None,
*,
document: Optional[Document] = None,
features: Optional[AnnotateTextRequest.Features] = None,
encoding_type: Optional[EncodingType] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = ()
) -> AnnotateTextResponse:
"""
Provides comprehensive text analysis in a single request.
Args:
request: The request object containing document, features, and options
document: Input document for analysis
features: Features to extract (sentiment, entities, syntax, etc.)
encoding_type: Text encoding type for offset calculations
retry: Retry configuration for the request
timeout: Request timeout in seconds
metadata: Additional metadata to send with the request
Returns:
AnnotateTextResponse containing all requested analysis results
"""from google.cloud import language
# Initialize client
client = language.LanguageServiceClient()
# Create document
document = language.Document(
content="""
Google is an amazing technology company founded by Larry Page and Sergey Brin.
They have revolutionized internet search and continue to innovate in artificial
intelligence and cloud computing. I'm really impressed with their latest products!
""",
type_=language.Document.Type.PLAIN_TEXT
)
# Configure features to extract
features = language.AnnotateTextRequest.Features(
extract_sentiment=True,
extract_entities=True,
extract_document_sentiment=True,
extract_entity_sentiment=True, # v1/v1beta2 only
extract_syntax=True, # v1/v1beta2 only
classify_text=True,
moderate_text=True
)
# Perform comprehensive analysis
response = client.annotate_text(
request={
"document": document,
"features": features,
"encoding_type": language.EncodingType.UTF8
}
)
# Process all results
print("=== COMPREHENSIVE TEXT ANALYSIS ===\n")
# Document sentiment
if response.document_sentiment:
print(f"Document Sentiment: {response.document_sentiment.score:.2f} (magnitude: {response.document_sentiment.magnitude:.2f})")
# Entities
if response.entities:
print(f"\nEntities Found: {len(response.entities)}")
for entity in response.entities[:3]: # Show top 3
print(f" - {entity.name} ({entity.type_.name}): salience {entity.salience:.2f}")
# Sentences with sentiment
if response.sentences:
print(f"\nSentences: {len(response.sentences)}")
for i, sentence in enumerate(response.sentences):
print(f" {i+1}. {sentence.text.content}")
if sentence.sentiment:
print(f" Sentiment: {sentence.sentiment.score:.2f}")
# Classification
if response.categories:
print(f"\nClassification Categories:")
for category in response.categories:
print(f" - {category.name}: {category.confidence:.2f}")
# Moderation
if response.moderation_categories:
print(f"\nModeration Results:")
flagged = [cat for cat in response.moderation_categories if cat.confidence > 0.5]
if flagged:
for cat in flagged:
print(f" ⚠️ {cat.name}: {cat.confidence:.2f}")
else:
print(" ✅ Content appears safe")
print(f"\nLanguage detected: {response.language}")class AnnotateTextRequest:
document: Document
features: Features
encoding_type: EncodingTypeConfiguration for which analysis features to include.
class Features:
extract_syntax: bool # Extract syntax info (v1/v1beta2 only)
extract_entities: bool # Extract entities
extract_document_sentiment: bool # Extract document sentiment
extract_entity_sentiment: bool # Extract entity sentiment (v1/v1beta2 only)
extract_sentiment: bool # Extract sentence-level sentiment
classify_text: bool # Classify text into categories
moderate_text: bool # Moderate content for safety
classification_model_options: ClassificationModelOptions # v1/v1beta2 onlyComprehensive response containing all requested analysis results.
class AnnotateTextResponse:
sentences: MutableSequence[Sentence] # Sentences with sentiment
tokens: MutableSequence[Token] # Tokens with syntax info (v1/v1beta2)
entities: MutableSequence[Entity] # Entities found
document_sentiment: Sentiment # Overall document sentiment
language: str # Detected language
categories: MutableSequence[ClassificationCategory] # Classification results
moderation_categories: MutableSequence[ClassificationCategory] # Moderation resultsdef comprehensive_content_analysis(client, text, api_version='v1'):
"""Perform complete analysis of text content."""
if api_version == 'v2':
# v2 has limited features
client = language_v2.LanguageServiceClient()
features = language_v2.AnnotateTextRequest.Features(
extract_sentiment=True,
extract_entities=True,
extract_document_sentiment=True,
classify_text=True,
moderate_text=True
)
else:
# v1/v1beta2 have full features
features = language.AnnotateTextRequest.Features(
extract_syntax=True,
extract_entities=True,
extract_document_sentiment=True,
extract_entity_sentiment=True,
extract_sentiment=True,
classify_text=True,
moderate_text=True
)
document = language.Document(
content=text,
type_=language.Document.Type.PLAIN_TEXT
)
response = client.annotate_text(
request={
"document": document,
"features": features,
"encoding_type": language.EncodingType.UTF8
}
)
# Process and structure results
analysis = {
'text': text,
'language': response.language,
'word_count': len(text.split()),
'character_count': len(text)
}
# Document sentiment
if response.document_sentiment:
analysis['document_sentiment'] = {
'score': response.document_sentiment.score,
'magnitude': response.document_sentiment.magnitude,
'label': get_sentiment_label(response.document_sentiment.score)
}
# Entities
if response.entities:
analysis['entities'] = []
for entity in response.entities:
entity_data = {
'name': entity.name,
'type': entity.type_.name,
'salience': entity.salience,
'mentions': len(entity.mentions)
}
# Add entity sentiment if available
if hasattr(entity, 'sentiment') and entity.sentiment:
entity_data['sentiment'] = {
'score': entity.sentiment.score,
'magnitude': entity.sentiment.magnitude
}
analysis['entities'].append(entity_data)
# Classification
if response.categories:
analysis['categories'] = [
{'name': cat.name, 'confidence': cat.confidence}
for cat in response.categories
]
# Moderation
if response.moderation_categories:
analysis['moderation'] = {
'safe': all(cat.confidence < 0.5 for cat in response.moderation_categories),
'categories': [
{'name': cat.name, 'confidence': cat.confidence}
for cat in response.moderation_categories if cat.confidence > 0.1
]
}
# Sentence analysis
if response.sentences:
analysis['sentences'] = []
for sentence in response.sentences:
sentence_data = {
'text': sentence.text.content,
'word_count': len(sentence.text.content.split())
}
if sentence.sentiment:
sentence_data['sentiment'] = {
'score': sentence.sentiment.score,
'magnitude': sentence.sentiment.magnitude
}
analysis['sentences'].append(sentence_data)
# Syntax analysis (v1/v1beta2 only)
if response.tokens:
pos_counts = {}
for token in response.tokens:
pos = token.part_of_speech.tag.name
pos_counts[pos] = pos_counts.get(pos, 0) + 1
analysis['syntax'] = {
'token_count': len(response.tokens),
'pos_distribution': pos_counts,
'complexity_score': calculate_complexity_score(response.tokens)
}
return analysis
def get_sentiment_label(score):
"""Convert sentiment score to human-readable label."""
if score >= 0.25:
return 'Positive'
elif score <= -0.25:
return 'Negative'
else:
return 'Neutral'
def calculate_complexity_score(tokens):
"""Calculate text complexity based on syntax."""
if not tokens:
return 0
complex_pos = ['ADJ', 'ADV', 'VERB']
complex_tokens = sum(1 for token in tokens if token.part_of_speech.tag.name in complex_pos)
return complex_tokens / len(tokens)
# Usage
text = """
Apple Inc. is a fantastic American technology company that designs and develops
consumer electronics, software, and online services. Founded by Steve Jobs,
Steve Wozniak, and Ronald Wayne in 1976, the company has become one of the
world's most valuable companies. I absolutely love their innovative products
and exceptional design philosophy!
"""
analysis = comprehensive_content_analysis(client, text)
print("Comprehensive Content Analysis:")
print(f"Language: {analysis['language']}")
print(f"Words: {analysis['word_count']}, Characters: {analysis['character_count']}")
if 'document_sentiment' in analysis:
sent = analysis['document_sentiment']
print(f"Overall Sentiment: {sent['label']} (score: {sent['score']:.2f})")
if 'entities' in analysis:
print(f"Entities: {len(analysis['entities'])}")
for entity in analysis['entities'][:3]:
print(f" - {entity['name']} ({entity['type']})")
if 'categories' in analysis:
print("Top Categories:")
for cat in analysis['categories'][:2]:
print(f" - {cat['name']}: {cat['confidence']:.2f}")
if 'moderation' in analysis:
print(f"Content Safety: {'Safe' if analysis['moderation']['safe'] else 'Flagged'}")def analyze_with_specific_features(client, text, feature_set='basic'):
"""Perform analysis with predefined feature sets."""
feature_sets = {
'basic': language.AnnotateTextRequest.Features(
extract_document_sentiment=True,
extract_entities=True
),
'sentiment_focus': language.AnnotateTextRequest.Features(
extract_document_sentiment=True,
extract_sentiment=True,
extract_entity_sentiment=True # v1/v1beta2 only
),
'safety_focus': language.AnnotateTextRequest.Features(
moderate_text=True,
classify_text=True,
extract_document_sentiment=True
),
'linguistic': language.AnnotateTextRequest.Features(
extract_syntax=True, # v1/v1beta2 only
extract_entities=True,
extract_sentiment=True
),
'complete': language.AnnotateTextRequest.Features(
extract_syntax=True, # v1/v1beta2 only
extract_entities=True,
extract_document_sentiment=True,
extract_entity_sentiment=True, # v1/v1beta2 only
extract_sentiment=True,
classify_text=True,
moderate_text=True
)
}
features = feature_sets.get(feature_set, feature_sets['basic'])
document = language.Document(
content=text,
type_=language.Document.Type.PLAIN_TEXT
)
response = client.annotate_text(
request={
"document": document,
"features": features
}
)
return response
# Usage examples
text = "Google's new AI technology is impressive but raises some privacy concerns."
# Basic analysis
basic_response = analyze_with_specific_features(client, text, 'basic')
print("Basic Analysis:")
print(f" Sentiment: {basic_response.document_sentiment.score:.2f}")
print(f" Entities: {len(basic_response.entities)}")
# Safety-focused analysis
safety_response = analyze_with_specific_features(client, text, 'safety_focus')
print("\nSafety Analysis:")
if safety_response.moderation_categories:
flagged = [cat for cat in safety_response.moderation_categories if cat.confidence > 0.3]
print(f" Safety issues: {len(flagged)}")
if safety_response.categories:
print(f" Categories: {len(safety_response.categories)}")def batch_comprehensive_analysis(client, texts, max_workers=3):
"""Perform comprehensive analysis on multiple texts concurrently."""
import concurrent.futures
def analyze_single_text(text):
features = language.AnnotateTextRequest.Features(
extract_document_sentiment=True,
extract_entities=True,
extract_sentiment=True,
classify_text=True,
moderate_text=True
)
document = language.Document(
content=text,
type_=language.Document.Type.PLAIN_TEXT
)
try:
response = client.annotate_text(
request={
"document": document,
"features": features
}
)
return {
'text': text[:100] + "..." if len(text) > 100 else text,
'success': True,
'sentiment_score': response.document_sentiment.score if response.document_sentiment else None,
'entity_count': len(response.entities),
'category_count': len(response.categories),
'safe': all(cat.confidence < 0.5 for cat in response.moderation_categories) if response.moderation_categories else True,
'language': response.language
}
except Exception as e:
return {
'text': text[:100] + "..." if len(text) > 100 else text,
'success': False,
'error': str(e)
}
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_text = {executor.submit(analyze_single_text, text): text for text in texts}
for future in concurrent.futures.as_completed(future_to_text):
result = future.result()
results.append(result)
return results
# Usage
texts = [
"This product is absolutely amazing and I love using it every day!",
"The customer service was terrible and the staff was very rude.",
"Apple announced new features for their latest iPhone model.",
"The weather is nice today and perfect for outdoor activities.",
"I'm concerned about privacy issues with social media platforms."
]
batch_results = batch_comprehensive_analysis(client, texts)
print("Batch Analysis Results:")
for i, result in enumerate(batch_results, 1):
print(f"{i}. {result['text']}")
if result['success']:
print(f" Sentiment: {result['sentiment_score']:.2f}")
print(f" Entities: {result['entity_count']}")
print(f" Categories: {result['category_count']}")
print(f" Safe: {result['safe']}")
print(f" Language: {result['language']}")
else:
print(f" Error: {result['error']}")
print()def optimized_analysis_pipeline(client, texts, chunk_size=10):
"""Optimized pipeline for processing large volumes of text."""
import time
results = []
total_texts = len(texts)
# Process in chunks to manage memory and API limits
for i in range(0, total_texts, chunk_size):
chunk = texts[i:i + chunk_size]
chunk_start_time = time.time()
print(f"Processing chunk {i//chunk_size + 1}/{(total_texts-1)//chunk_size + 1}")
for text in chunk:
# Use minimal features for faster processing
features = language.AnnotateTextRequest.Features(
extract_document_sentiment=True,
extract_entities=True,
classify_text=True
)
document = language.Document(
content=text,
type_=language.Document.Type.PLAIN_TEXT
)
try:
response = client.annotate_text(
request={
"document": document,
"features": features
},
timeout=10.0 # Shorter timeout for faster processing
)
# Extract key metrics only
result = {
'sentiment': response.document_sentiment.score if response.document_sentiment else 0,
'entity_count': len(response.entities),
'top_category': response.categories[0].name if response.categories else None,
'language': response.language
}
results.append(result)
except Exception as e:
results.append({
'error': str(e),
'sentiment': 0,
'entity_count': 0,
'top_category': None,
'language': 'unknown'
})
chunk_time = time.time() - chunk_start_time
print(f" Processed {len(chunk)} texts in {chunk_time:.2f} seconds")
# Brief pause between chunks
time.sleep(0.1)
return results
# Usage for high-volume processing
# large_text_collection = [...] # Your large collection of texts
# optimized_results = optimized_analysis_pipeline(client, large_text_collection)from google.api_core import exceptions
try:
response = client.annotate_text(
request={
"document": document,
"features": features
},
timeout=30.0
)
except exceptions.InvalidArgument as e:
print(f"Invalid request: {e}")
# Check document content and feature configuration
except exceptions.DeadlineExceeded:
print("Request timed out - consider reducing features or text length")
except exceptions.ResourceExhausted:
print("API quota exceeded")
except exceptions.FailedPrecondition as e:
print(f"Feature not available in this API version: {e}")
except exceptions.GoogleAPIError as e:
print(f"API error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-language