Production-grade platform engineering handbook — Kubernetes, Terraform, Flux CD, GitHub Actions, AWS, and more.
67
84%
Does it follow best practices?
Impact
—
No eval scenarios have been run
Passed
No known issues
"""
LLM Observability — Python instrumentation example.
Requires: ddtrace >= 2.10.0
Install: pip install ddtrace openai
Environment variables (set before running):
DD_LLMOBS_ML_APP=orders-assistant
DD_LLMOBS_AGENTLESS_ENABLED=true
DD_API_KEY=<your-api-key>
DD_SITE=datadoghq.eu
DD_ENV=production
DD_SERVICE=orders-assistant
DD_VERSION=1.0.0
"""
import os
import openai
from ddtrace.llmobs import LLMObs
from ddtrace.llmobs.decorators import llm, workflow, retrieval
LLMObs.enable(
ml_app=os.environ["DD_LLMOBS_ML_APP"],
agentless_enabled=os.environ.get("DD_LLMOBS_AGENTLESS_ENABLED", "true").lower() == "true",
api_key=os.environ["DD_API_KEY"],
site=os.environ.get("DD_SITE", "datadoghq.eu"),
)
openai_client = openai.OpenAI()
@retrieval(name="fetch_order_context")
def fetch_order_context(order_id: str) -> list[dict]:
"""Simulates a vector DB retrieval step."""
docs = [
{"id": "doc-1", "score": 0.92, "text": f"Order {order_id}: 3x Widget, shipped 2026-05-19"},
{"id": "doc-2", "score": 0.87, "text": "Standard shipping: 3-5 business days"},
]
LLMObs.annotate(
input_data=order_id,
output_data=[{"id": d["id"], "score": d["score"], "text": d["text"]} for d in docs],
)
return docs
@llm(model_provider="openai", model_name="gpt-4o", name="generate_order_summary")
def generate_order_summary(order_id: str, context: list[dict]) -> str:
"""Calls OpenAI and traces the LLM span."""
context_text = "\n".join(d["text"] for d in context)
prompt = f"Using only the context below, summarise order {order_id} in one sentence.\n\nContext:\n{context_text}"
response = openai_client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
)
answer = response.choices[0].message.content
LLMObs.annotate(
input_data=[{"role": "user", "content": prompt}],
output_data=[{"role": "assistant", "content": answer}],
metrics={
"input_tokens": response.usage.prompt_tokens,
"output_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
},
)
span_ctx = LLMObs.export_span()
LLMObs.submit_evaluation(
span=span_ctx,
label="faithfulness",
metric_type="score",
value=1.0, # Replace with a real evaluator function in production
)
return answer
@workflow(name="order_summary_workflow")
def order_summary_workflow(order_id: str) -> dict:
"""End-to-end workflow: retrieval → LLM → return."""
context = fetch_order_context(order_id)
summary = generate_order_summary(order_id, context)
return {"order_id": order_id, "summary": summary}
if __name__ == "__main__":
result = order_summary_workflow("ORD-12345")
print(result["summary"]).claude-plugin
.github
commands
docs
examples
agent-self-improve
argocd
awesome-docs
aws
cloudfront
functions
lambda-edge
functions
azure
compliance
conventional-commits
datadog
llm-observability
demo
documentation
dora
dynatrace
fluxcd
github-actions
composite-actions
configure-cloud
db-migrate
docker-build-push
k8s-deploy
notify-slack
pr-comment
release-tag
security-scan
setup-env
setup-terraform
terraform-plan
helm
web-service
templates
kubernetes
kyverno
mcp
observability
openshift
pr-review
ownership
runtime-security
supply-chain
terraform
references
scripts
skills
platform-skills
tests