Google Cloud Natural Language API client library providing sentiment analysis, entity recognition, text classification, and content moderation capabilities
—
Provides comprehensive linguistic analysis including part-of-speech tagging, dependency parsing, morphological analysis, and token-level information to understand the grammatical structure and linguistic properties of text. Essential for applications requiring deep language understanding, grammar checking, and linguistic research.
Note: This feature is only available in API versions v1 and v1beta2. It is not included in the simplified v2 API.
Performs detailed syntactic analysis of the provided text, returning information about sentences, tokens, part-of-speech tags, and dependency relationships.
def analyze_syntax(
self,
request: Optional[Union[AnalyzeSyntaxRequest, dict]] = None,
*,
document: Optional[Document] = None,
encoding_type: Optional[EncodingType] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = ()
) -> AnalyzeSyntaxResponse:
"""
Analyzes the syntax of the text and provides part-of-speech tagging,
dependency parsing, and other linguistic information.
Args:
request: The request object containing document and options
document: Input document for analysis
encoding_type: Text encoding type for offset calculations
retry: Retry configuration for the request
timeout: Request timeout in seconds
metadata: Additional metadata to send with the request
Returns:
AnalyzeSyntaxResponse containing linguistic analysis results
"""from google.cloud import language_v1 # Use v1 or v1beta2
# Initialize client (must use v1 or v1beta2)
client = language_v1.LanguageServiceClient()
# Create document
document = language_v1.Document(
content="The quick brown fox jumps over the lazy dog.",
type_=language_v1.Document.Type.PLAIN_TEXT
)
# Analyze syntax
response = client.analyze_syntax(
request={"document": document}
)
# Process sentences
print("Sentences:")
for i, sentence in enumerate(response.sentences):
print(f"{i+1}. {sentence.text.content}")
print("\nTokens with POS tags:")
for token in response.tokens:
print(f"'{token.text.content}' - {token.part_of_speech.tag.name}")
print("\nDependency relationships:")
for i, token in enumerate(response.tokens):
if token.dependency_edge.head_token_index != i: # Not the root
head_token = response.tokens[token.dependency_edge.head_token_index]
print(f"'{token.text.content}' --{token.dependency_edge.label.name}--> '{head_token.text.content}'")class AnalyzeSyntaxRequest:
document: Document
encoding_type: EncodingTypeclass AnalyzeSyntaxResponse:
sentences: MutableSequence[Sentence]
tokens: MutableSequence[Token]
language: strRepresents a linguistic token with comprehensive morphological and syntactic information.
class Token:
text: TextSpan # Token text and position
part_of_speech: PartOfSpeech # Part-of-speech information
dependency_edge: DependencyEdge # Dependency relationship
lemma: str # Canonical form of the tokenComprehensive part-of-speech and morphological information.
class PartOfSpeech:
class Tag(proto.Enum):
UNKNOWN = 0
ADJ = 1 # Adjective
ADP = 2 # Adposition (preposition/postposition)
ADV = 3 # Adverb
CONJ = 4 # Conjunction
DET = 5 # Determiner
NOUN = 6 # Noun
NUM = 7 # Numeral
PRON = 8 # Pronoun
PRT = 9 # Particle
PUNCT = 10 # Punctuation
VERB = 11 # Verb
X = 12 # Other/Unknown
AFFIX = 13 # Affix
class Aspect(proto.Enum):
ASPECT_UNKNOWN = 0
PERFECTIVE = 1
IMPERFECTIVE = 2
PROGRESSIVE = 3
class Case(proto.Enum):
CASE_UNKNOWN = 0
ACCUSATIVE = 1
ADVERBIAL = 2
COMPLEMENTIVE = 3
DATIVE = 4
GENITIVE = 5
INSTRUMENTAL = 6
LOCATIVE = 7
NOMINATIVE = 8
OBLIQUE = 9
PARTITIVE = 10
PREPOSITIONAL = 11
REFLEXIVE_CASE = 12
RELATIVE_CASE = 13
VOCATIVE = 14
# Additional enums for Form, Gender, Mood, Number, Person, Proper, Reciprocity, Tense, Voice
tag: Tag # Main part-of-speech tag
aspect: Aspect # Verbal aspect
case: Case # Grammatical case
form: Form # Word form
gender: Gender # Grammatical gender
mood: Mood # Grammatical mood
number: Number # Grammatical number
person: Person # Grammatical person
proper: Proper # Proper noun indicator
reciprocity: Reciprocity # Reciprocity
tense: Tense # Grammatical tense
voice: Voice # Grammatical voiceRepresents a dependency relationship between tokens in the parse tree.
class DependencyEdge:
class Label(proto.Enum):
UNKNOWN = 0
ABBREV = 1 # Abbreviation modifier
ACOMP = 2 # Adjectival complement
ADVCL = 3 # Adverbial clause modifier
ADVMOD = 4 # Adverbial modifier
AMOD = 5 # Adjectival modifier
APPOS = 6 # Appositional modifier
ATTR = 7 # Attribute
AUX = 8 # Auxiliary
AUXPASS = 9 # Passive auxiliary
CC = 10 # Coordinating conjunction
CCOMP = 11 # Clausal complement
CONJ = 12 # Conjunct
CSUBJ = 13 # Clausal subject
CSUBJPASS = 14 # Clausal passive subject
DEP = 15 # Dependent
DET = 16 # Determiner
DISCOURSE = 17 # Discourse element
DOBJ = 18 # Direct object
EXPL = 19 # Expletive
GOESWITH = 20 # Goes with
IOBJ = 21 # Indirect object
MARK = 22 # Marker
MWE = 23 # Multi-word expression
MWV = 24 # Multi-word verbal expression
NEG = 25 # Negation modifier
NN = 26 # Noun compound modifier
NPADVMOD = 27 # Noun phrase adverbial modifier
NSUBJ = 28 # Nominal subject
NSUBJPASS = 29 # Passive nominal subject
NUM = 30 # Numeric modifier
NUMBER = 31 # Element of compound number
P = 32 # Punctuation mark
PARATAXIS = 33 # Parataxis
PARTMOD = 34 # Participial modifier
PCOMP = 35 # Prepositional complement
POBJ = 36 # Object of preposition
POSS = 37 # Possession modifier
POSTNEG = 38 # Postverbal negative particle
PRECOMP = 39 # Predicate complement
PRECONJ = 40 # Preconjunct
PREDET = 41 # Predeterminer
PREF = 42 # Prefix
PREP = 43 # Prepositional modifier
PRONL = 44 # Pronominal locative
PRT = 45 # Particle
PS = 46 # Possessive ending
QUANTMOD = 47 # Quantifier phrase modifier
RCMOD = 48 # Relative clause modifier
RCMODREL = 49 # Complementizer in relative clause
RDROP = 50 # Ellipsis without a preceding predicate
REF = 51 # Referent
REMNANT = 52 # Remnant
REPARANDUM = 53 # Reparandum
ROOT = 54 # Root
SNUM = 55 # Suffix specifying a unit of number
SUFF = 56 # Suffix
TMOD = 57 # Temporal modifier
TOPIC = 58 # Topic marker
VMOD = 59 # Verbal modifier
VOCATIVE = 60 # Vocative
XCOMP = 61 # Open clausal complement
SUFFIX = 62 # Suffix
TITLE = 63 # Title
ADVPHMOD = 64 # Adverbial phrase modifier
AUXCAUS = 65 # Causative auxiliary
AUXVV = 66 # Helper auxiliary
DTMOD = 67 # Rentaishi (Prenominal modifier)
FOREIGN = 68 # Foreign words
KW = 69 # Keyword
LIST = 70 # List for chains of comparable items
NOMC = 71 # Nominalized clause
NOMCSUBJ = 72 # Nominalized clausal subject
NOMCSUBJPASS = 73 # Nominalized clausal passive
NUMC = 74 # Compound of numeric modifier
COP = 75 # Copula
DISLOCATED = 76 # Dislocated relation
ASP = 77 # Aspect marker
GMOD = 78 # Genitive modifier
GOBJ = 79 # Genitive object
INFMOD = 80 # Infinitival modifier
MES = 81 # Measure
NCOMP = 82 # Nominal complement of a noun
head_token_index: int # Index of the head token
label: Label # Dependency relationship labeldef analyze_pos_distribution(client, text):
"""Analyze the distribution of parts of speech in text."""
document = language_v1.Document(
content=text,
type_=language_v1.Document.Type.PLAIN_TEXT
)
response = client.analyze_syntax(
request={"document": document}
)
pos_counts = {}
total_tokens = len(response.tokens)
for token in response.tokens:
pos_tag = token.part_of_speech.tag.name
pos_counts[pos_tag] = pos_counts.get(pos_tag, 0) + 1
print("Part-of-Speech Distribution:")
for pos, count in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True):
percentage = (count / total_tokens) * 100
print(f"{pos}: {count} ({percentage:.1f}%)")
return pos_counts
# Usage
text = "The quick brown fox jumps gracefully over the very lazy dog near the old oak tree."
pos_distribution = analyze_pos_distribution(client, text)def visualize_dependency_tree(client, text):
"""Create a simple text representation of the dependency tree."""
document = language_v1.Document(
content=text,
type_=language_v1.Document.Type.PLAIN_TEXT
)
response = client.analyze_syntax(
request={"document": document}
)
# Find the root token
root_index = None
for i, token in enumerate(response.tokens):
if token.dependency_edge.label == language_v1.DependencyEdge.Label.ROOT:
root_index = i
break
if root_index is not None:
print(f"Dependency Tree (root: '{response.tokens[root_index].text.content}'):")
print_dependency_subtree(response.tokens, root_index, 0)
return response.tokens
def print_dependency_subtree(tokens, head_index, depth):
"""Recursively print dependency subtree."""
head_token = tokens[head_index]
indent = " " * depth
pos_tag = head_token.part_of_speech.tag.name
print(f"{indent}{head_token.text.content} ({pos_tag})")
# Find children
children = []
for i, token in enumerate(tokens):
if token.dependency_edge.head_token_index == head_index and i != head_index:
children.append((i, token.dependency_edge.label.name))
# Sort children by position in sentence
children.sort(key=lambda x: tokens[x[0]].text.begin_offset)
for child_index, relation in children:
child_indent = " " * (depth + 1)
print(f"{child_indent}--{relation}-->")
print_dependency_subtree(tokens, child_index, depth + 2)
# Usage
text = "The cat sat on the mat."
visualize_dependency_tree(client, text)def extract_lemmas(client, text):
"""Extract lemmatized forms of words."""
document = language_v1.Document(
content=text,
type_=language_v1.Document.Type.PLAIN_TEXT
)
response = client.analyze_syntax(
request={"document": document}
)
lemmas = []
print("Word -> Lemma:")
for token in response.tokens:
word = token.text.content
lemma = token.lemma
pos = token.part_of_speech.tag.name
if word != lemma:
print(f"{word} -> {lemma} ({pos})")
lemmas.append(lemma)
return lemmas
# Usage
text = "The children were running quickly through the trees and jumped over the fallen logs."
lemmas = extract_lemmas(client, text)
print(f"\nLemmatized text: {' '.join(lemmas)}")def extract_svo_triples(client, text):
"""Extract Subject-Verb-Object triples from text."""
document = language_v1.Document(
content=text,
type_=language_v1.Document.Type.PLAIN_TEXT
)
response = client.analyze_syntax(
request={"document": document}
)
triples = []
# Find verbs
for i, token in enumerate(response.tokens):
if token.part_of_speech.tag == language_v1.PartOfSpeech.Tag.VERB:
verb = token.text.content
subject = None
obj = None
# Find subject and object
for j, dependent in enumerate(response.tokens):
if dependent.dependency_edge.head_token_index == i:
if dependent.dependency_edge.label == language_v1.DependencyEdge.Label.NSUBJ:
subject = dependent.text.content
elif dependent.dependency_edge.label == language_v1.DependencyEdge.Label.DOBJ:
obj = dependent.text.content
if subject and obj:
triples.append((subject, verb, obj))
return triples
# Usage
text = "The dog chased the cat. Mary loves books. John ate an apple."
svo_triples = extract_svo_triples(client, text)
print("Subject-Verb-Object triples:")
for subject, verb, obj in svo_triples:
print(f"{subject} -> {verb} -> {obj}")def analyze_morphology(client, text):
"""Analyze morphological features of words."""
document = language_v1.Document(
content=text,
type_=language_v1.Document.Type.PLAIN_TEXT
)
response = client.analyze_syntax(
request={"document": document}
)
print("Morphological Analysis:")
for token in response.tokens:
word = token.text.content
pos_info = token.part_of_speech
features = []
# Collect non-unknown morphological features
if pos_info.aspect != language_v1.PartOfSpeech.Aspect.ASPECT_UNKNOWN:
features.append(f"Aspect: {pos_info.aspect.name}")
if pos_info.case != language_v1.PartOfSpeech.Case.CASE_UNKNOWN:
features.append(f"Case: {pos_info.case.name}")
if pos_info.gender != language_v1.PartOfSpeech.Gender.GENDER_UNKNOWN:
features.append(f"Gender: {pos_info.gender.name}")
if pos_info.mood != language_v1.PartOfSpeech.Mood.MOOD_UNKNOWN:
features.append(f"Mood: {pos_info.mood.name}")
if pos_info.number != language_v1.PartOfSpeech.Number.NUMBER_UNKNOWN:
features.append(f"Number: {pos_info.number.name}")
if pos_info.person != language_v1.PartOfSpeech.Person.PERSON_UNKNOWN:
features.append(f"Person: {pos_info.person.name}")
if pos_info.tense != language_v1.PartOfSpeech.Tense.TENSE_UNKNOWN:
features.append(f"Tense: {pos_info.tense.name}")
if pos_info.voice != language_v1.PartOfSpeech.Voice.VOICE_UNKNOWN:
features.append(f"Voice: {pos_info.voice.name}")
if features:
print(f"{word} ({pos_info.tag.name}): {', '.join(features)}")
else:
print(f"{word} ({pos_info.tag.name})")
# Usage
text = "The cats were sleeping peacefully in their beds."
analyze_morphology(client, text)def analyze_sentence_complexity(client, text):
"""Analyze grammatical complexity of sentences."""
document = language_v1.Document(
content=text,
type_=language_v1.Document.Type.PLAIN_TEXT
)
response = client.analyze_syntax(
request={"document": document}
)
sentence_stats = []
for sentence in response.sentences:
# Find tokens in this sentence
sentence_tokens = [
token for token in response.tokens
if (token.text.begin_offset >= sentence.text.begin_offset and
token.text.begin_offset < sentence.text.begin_offset + len(sentence.text.content))
]
# Count different types of dependencies
clause_count = 0
modifier_count = 0
for token in sentence_tokens:
label = token.dependency_edge.label
if label in [language_v1.DependencyEdge.Label.CCOMP,
language_v1.DependencyEdge.Label.ADVCL,
language_v1.DependencyEdge.Label.RCMOD]:
clause_count += 1
elif label in [language_v1.DependencyEdge.Label.AMOD,
language_v1.DependencyEdge.Label.ADVMOD,
language_v1.DependencyEdge.Label.PREP]:
modifier_count += 1
stats = {
'sentence': sentence.text.content,
'token_count': len(sentence_tokens),
'clause_count': clause_count,
'modifier_count': modifier_count,
'complexity_score': len(sentence_tokens) + clause_count * 2 + modifier_count
}
sentence_stats.append(stats)
return sentence_stats
# Usage
text = """
The cat sat.
The big fluffy cat that we adopted last year sat quietly on the comfortable wooden chair
that my grandmother gave me when I moved into my first apartment.
"""
complexity_stats = analyze_sentence_complexity(client, text)
print("Sentence Complexity Analysis:")
for i, stats in enumerate(complexity_stats, 1):
print(f"Sentence {i}: {stats['sentence'][:50]}...")
print(f" Tokens: {stats['token_count']}")
print(f" Clauses: {stats['clause_count']}")
print(f" Modifiers: {stats['modifier_count']}")
print(f" Complexity Score: {stats['complexity_score']}")
print()from google.api_core import exceptions
try:
response = client.analyze_syntax(
request={"document": document},
timeout=25.0
)
except exceptions.InvalidArgument as e:
print(f"Invalid request: {e}")
except exceptions.DeadlineExceeded:
print("Request timed out")
except exceptions.FailedPrecondition as e:
print(f"API version error: {e}")
print("Note: Syntax analysis requires v1 or v1beta2")
except exceptions.GoogleAPIError as e:
print(f"API error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-language