Агенты недетерминированы — нужно видеть что они делают. Трассировка каждого шага, логирование tool-calls, мониторинг ошибок.
# Три столпа Observability для AI-агентов: # # 1. Трассировка (Traces) — путь каждого запроса через систему # LLM call → tool selection → tool execution → next LLM call → response # # 2. Метрики (Metrics) — агрегированные показатели # latency, error_rate, token_usage, tool_success_rate # # 3. Логи (Logs) — детальные события # каждый tool-call, ошибка, ретрай, решение агента class AgentObservability: """Базовая структура observability для агента.""" def __init__(self, agent_name): self.traces = [] # список span'ов self.metrics = {} # агрегация self.name = agent_name self.errors = [] # логи ошибок
pip install langsmith import os from langsmith import traceable # Настройка LangSmith os.environ["LANGSMITH_API_KEY"] = "ls_..." os.environ["LANGSMITH_TRACING"] = "true" os.environ["LANGSMITH_PROJECT"] = "my-agent" @traceable(name="agent_think") def agent_think(query, context): """Шаг: агент решает, какой инструмент вызвать.""" prompt = f"Query: {query}\nContext: {context}\nChoose tool:" response = llm.invoke(prompt) return response @traceable(name="tool_execute") def tool_execute(tool_name, args): """Шаг: выполнение выбранного инструмента.""" tool = TOOL_REGISTRY[tool_name] result = tool(**args) return result @traceable(name="agent_run", run_type="chain") def agent_run(user_query): """Полный запуск агента — корневой span.""" context = load_context() decision = agent_think(user_query, context) result = tool_execute(decision.tool, decision.args) return result
import json, time from functools import wraps from datetime import datetime def log_tool_call(func): """Декоратор: перехватывает вызовы инструментов и пишет лог.""" @wraps(func) def wrapper(*args, **kwargs): call_id = f"call_{int(time.time()*1000)}" log_entry = { "id": call_id, "tool": func.__name__, "args": str(args), "kwargs": str(kwargs), "timestamp": datetime.now().isoformat(), } start = time.time() try: result = func(*args, **kwargs) log_entry.update({"status": "success", "latency": time.time()-start}) return result except Exception as e: log_entry.update({"status": "error", "error": str(e), "latency": time.time()-start}) raise finally: print(json.dumps(log_entry)) # structured log → stdout return wrapper @log_tool_call def search_web(query): return f"Results for: {query}"
pip install prometheus-client from prometheus_client import Counter, Histogram, Gauge, generate_latest from fastapi import FastAPI # Prometheus метрики agent_requests = Counter('agent_requests_total', 'Total agent requests', ['agent', 'status']) agent_latency = Histogram('agent_latency_seconds', 'Request latency', ['agent', 'step'], buckets=[.1, .5, 1, 2, 5, 10, 30]) agent_errors = Counter('agent_errors_total', 'Total errors', ['agent', 'error_type']) token_usage = Counter('agent_tokens_total', 'Token usage', ['agent', 'type']) active_sessions = Gauge('agent_active_sessions', 'Active sessions', ['agent']) app = FastAPI() @app.get("/metrics") def metrics(): return generate_latest()
# Grafana dashboard JSON — ключевые панели # Скопируйте этот JSON в Grafana → Import dashboard { "title": "AI Agent Dashboard", "panels": [ { "title": "Requests per minute", "targets": [{"expr": "rate(agent_requests_total[5m])"}] }, { "title": "P95 Latency", "targets": [{"expr": "histogram_quantile(0.95, rate(agent_latency_seconds_bucket[5m]))"}] }, { "title": "Error Rate", "targets": [{"expr": "rate(agent_errors_total[5m]) / rate(agent_requests_total[5m])"}] }, { "title": "Token Usage (input vs output)", "targets": [{"expr": "rate(agent_tokens_total[5m])"}] } ] }
# prometheus-alert-rules.yml — правила для AlertManager groups: - name: agent_alerts rules: - alert: HighErrorRate expr: rate(agent_errors_total[5m]) / rate(agent_requests_total[5m]) > 0.05 for: 2m labels: severity: critical annotations: summary: "Error rate > 5% for agent {{ $labels.agent }}" - alert: HighLatency expr: histogram_quantile(0.95, rate(agent_latency_seconds_bucket[5m])) > 10 for: 5m labels: severity: warning annotations: summary: "P95 latency > 10s for agent {{ $labels.agent }}" - alert: HighTokenUsage expr: rate(agent_tokens_total[1h]) > 100000 for: 10m labels: severity: warning annotations: summary: "Token usage spiking: {{ $value }} tokens/hour"