ai-robot-core/ai-service/app/api/admin/dashboard.py

305 lines
13 KiB
Python
Raw Normal View History

"""
Dashboard statistics endpoints.
[AC-AISVC-91, AC-AISVC-92] Provides overview statistics for the admin dashboard.
Enhanced with monitoring metrics for intent rules, templates, flows, and guardrails.
"""
import logging
from datetime import datetime, timedelta
from typing import Annotated, Any
from fastapi import APIRouter, Depends, Query
from fastapi.responses import JSONResponse
from sqlalchemy import desc, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_session
from app.core.exceptions import MissingTenantIdException
from app.core.tenant import get_tenant_id
from app.models import ErrorResponse
from app.models.entities import ChatMessage, ChatSession, Document, KnowledgeBase
from app.services.monitoring.cache import get_monitoring_cache
from app.services.monitoring.dashboard_service import DashboardService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin/dashboard", tags=["Dashboard"])
LATENCY_THRESHOLD_MS = 5000
def get_current_tenant_id() -> str:
"""Dependency to get current tenant ID or raise exception."""
tenant_id = get_tenant_id()
if not tenant_id:
raise MissingTenantIdException()
return tenant_id
def parse_date_param(date_str: str | None) -> datetime | None:
"""Parse ISO 8601 date string to datetime."""
if not date_str:
return None
try:
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
except ValueError:
return None
@router.get(
"/stats",
operation_id="getDashboardStats",
summary="Get dashboard statistics",
description="[AC-AISVC-91, AC-AISVC-92] Get overview statistics for the admin dashboard with enhanced monitoring metrics.",
responses={
200: {"description": "Dashboard statistics"},
401: {"description": "Unauthorized", "model": ErrorResponse},
403: {"description": "Forbidden", "model": ErrorResponse},
},
)
async def get_dashboard_stats(
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
session: Annotated[AsyncSession, Depends(get_session)],
latency_threshold: int = Query(default=LATENCY_THRESHOLD_MS, description="Latency threshold in ms"),
start_date: str | None = Query(default=None, description="Start date filter (ISO 8601)"),
end_date: str | None = Query(default=None, description="End date filter (ISO 8601)"),
include_enhanced: bool = Query(default=True, description="Include enhanced monitoring stats"),
) -> JSONResponse:
"""
[AC-AISVC-91, AC-AISVC-92] Get dashboard statistics including:
- Basic stats: knowledge bases, messages, documents, sessions
- Token statistics
- Latency statistics
- Enhanced stats (v0.7.0): intent rules, templates, flows, guardrails
"""
logger.info(f"Getting dashboard stats: tenant={tenant_id}, start={start_date}, end={end_date}")
start_dt = parse_date_param(start_date)
end_dt = parse_date_param(end_date)
kb_count_stmt = select(func.count()).select_from(KnowledgeBase).where(
KnowledgeBase.tenant_id == tenant_id
)
kb_result = await session.execute(kb_count_stmt)
kb_count = kb_result.scalar() or 0
msg_count_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id
)
if start_dt:
msg_count_stmt = msg_count_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
msg_count_stmt = msg_count_stmt.where(ChatMessage.created_at <= end_dt)
msg_result = await session.execute(msg_count_stmt)
msg_count = msg_result.scalar() or 0
doc_count_stmt = select(func.count()).select_from(Document).where(
Document.tenant_id == tenant_id
)
doc_result = await session.execute(doc_count_stmt)
doc_count = doc_result.scalar() or 0
session_count_stmt = select(func.count()).select_from(ChatSession).where(
ChatSession.tenant_id == tenant_id
)
if start_dt:
session_count_stmt = session_count_stmt.where(ChatSession.created_at >= start_dt)
if end_dt:
session_count_stmt = session_count_stmt.where(ChatSession.created_at <= end_dt)
session_result = await session.execute(session_count_stmt)
session_count = session_result.scalar() or 0
total_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.total_tokens), 0)).select_from(
ChatMessage
).where(ChatMessage.tenant_id == tenant_id)
if start_dt:
total_tokens_stmt = total_tokens_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
total_tokens_stmt = total_tokens_stmt.where(ChatMessage.created_at <= end_dt)
total_tokens_result = await session.execute(total_tokens_stmt)
total_tokens = total_tokens_result.scalar() or 0
prompt_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.prompt_tokens), 0)).select_from(
ChatMessage
).where(ChatMessage.tenant_id == tenant_id)
if start_dt:
prompt_tokens_stmt = prompt_tokens_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
prompt_tokens_stmt = prompt_tokens_stmt.where(ChatMessage.created_at <= end_dt)
prompt_tokens_result = await session.execute(prompt_tokens_stmt)
prompt_tokens = prompt_tokens_result.scalar() or 0
completion_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.completion_tokens), 0)).select_from(
ChatMessage
).where(ChatMessage.tenant_id == tenant_id)
if start_dt:
completion_tokens_stmt = completion_tokens_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
completion_tokens_stmt = completion_tokens_stmt.where(ChatMessage.created_at <= end_dt)
completion_tokens_result = await session.execute(completion_tokens_stmt)
completion_tokens = completion_tokens_result.scalar() or 0
ai_requests_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant"
)
if start_dt:
ai_requests_stmt = ai_requests_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
ai_requests_stmt = ai_requests_stmt.where(ChatMessage.created_at <= end_dt)
ai_requests_result = await session.execute(ai_requests_stmt)
ai_requests_count = ai_requests_result.scalar() or 0
avg_latency_stmt = select(func.coalesce(func.avg(ChatMessage.latency_ms), 0)).select_from(
ChatMessage
).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
if start_dt:
avg_latency_stmt = avg_latency_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
avg_latency_stmt = avg_latency_stmt.where(ChatMessage.created_at <= end_dt)
avg_latency_result = await session.execute(avg_latency_stmt)
avg_latency_ms = float(avg_latency_result.scalar() or 0)
last_request_stmt = select(ChatMessage.latency_ms, ChatMessage.created_at).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant"
).order_by(desc(ChatMessage.created_at)).limit(1)
last_request_result = await session.execute(last_request_stmt)
last_request_row = last_request_result.fetchone()
last_latency_ms = last_request_row[0] if last_request_row else None
last_request_time = last_request_row[1].isoformat() if last_request_row and last_request_row[1] else None
slow_requests_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None),
ChatMessage.latency_ms >= latency_threshold
)
if start_dt:
slow_requests_stmt = slow_requests_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
slow_requests_stmt = slow_requests_stmt.where(ChatMessage.created_at <= end_dt)
slow_requests_result = await session.execute(slow_requests_stmt)
slow_requests_count = slow_requests_result.scalar() or 0
error_requests_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.is_error == True
)
if start_dt:
error_requests_stmt = error_requests_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
error_requests_stmt = error_requests_stmt.where(ChatMessage.created_at <= end_dt)
error_requests_result = await session.execute(error_requests_stmt)
error_requests_count = error_requests_result.scalar() or 0
p95_latency_stmt = select(func.coalesce(
func.percentile_cont(0.95).within_group(ChatMessage.latency_ms), 0
)).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
if start_dt:
p95_latency_stmt = p95_latency_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
p95_latency_stmt = p95_latency_stmt.where(ChatMessage.created_at <= end_dt)
p95_latency_result = await session.execute(p95_latency_stmt)
p95_latency_ms = float(p95_latency_result.scalar() or 0)
p99_latency_stmt = select(func.coalesce(
func.percentile_cont(0.99).within_group(ChatMessage.latency_ms), 0
)).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
if start_dt:
p99_latency_stmt = p99_latency_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
p99_latency_stmt = p99_latency_stmt.where(ChatMessage.created_at <= end_dt)
p99_latency_result = await session.execute(p99_latency_stmt)
p99_latency_ms = float(p99_latency_result.scalar() or 0)
min_latency_stmt = select(func.coalesce(func.min(ChatMessage.latency_ms), 0)).select_from(
ChatMessage
).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
if start_dt:
min_latency_stmt = min_latency_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
min_latency_stmt = min_latency_stmt.where(ChatMessage.created_at <= end_dt)
min_latency_result = await session.execute(min_latency_stmt)
min_latency_ms = float(min_latency_result.scalar() or 0)
max_latency_stmt = select(func.coalesce(func.max(ChatMessage.latency_ms), 0)).select_from(
ChatMessage
).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
if start_dt:
max_latency_stmt = max_latency_stmt.where(ChatMessage.created_at >= start_dt)
if end_dt:
max_latency_stmt = max_latency_stmt.where(ChatMessage.created_at <= end_dt)
max_latency_result = await session.execute(max_latency_stmt)
max_latency_ms = float(max_latency_result.scalar() or 0)
response_data: dict[str, Any] = {
"knowledgeBases": kb_count,
"totalMessages": msg_count,
"totalDocuments": doc_count,
"totalSessions": session_count,
"totalTokens": total_tokens,
"promptTokens": prompt_tokens,
"completionTokens": completion_tokens,
"aiRequestsCount": ai_requests_count,
"avgLatencyMs": round(avg_latency_ms, 2),
"lastLatencyMs": last_latency_ms,
"lastRequestTime": last_request_time,
"slowRequestsCount": slow_requests_count,
"errorRequestsCount": error_requests_count,
"p95LatencyMs": round(p95_latency_ms, 2),
"p99LatencyMs": round(p99_latency_ms, 2),
"minLatencyMs": round(min_latency_ms, 2),
"maxLatencyMs": round(max_latency_ms, 2),
"latencyThresholdMs": latency_threshold,
}
if include_enhanced:
try:
cache = get_monitoring_cache()
dashboard_service = DashboardService(session, cache)
enhanced_stats = await dashboard_service.get_enhanced_stats(
tenant_id=tenant_id,
start_date=start_dt,
end_date=end_dt,
)
response_data.update(enhanced_stats.to_dict())
except Exception as e:
logger.warning(f"Failed to get enhanced stats: {e}")
response_data.update({
"intentRuleHitRate": 0.0,
"intentRuleHitCount": 0,
"topIntentRules": [],
"promptTemplateUsageCount": 0,
"topPromptTemplates": [],
"scriptFlowActivationCount": 0,
"scriptFlowCompletionRate": 0.0,
"topScriptFlows": [],
"guardrailBlockCount": 0,
"guardrailBlockRate": 0.0,
"topGuardrailWords": [],
})
return JSONResponse(content=response_data)