From 6b21ba8351a152ac9471601d1bf0013c4996a219 Mon Sep 17 00:00:00 2001 From: MerCry Date: Sat, 28 Feb 2026 12:52:50 +0800 Subject: [PATCH] =?UTF-8?q?feat(v0.7.0):=20=E9=AA=8C=E6=94=B6=E9=80=9A?= =?UTF-8?q?=E8=BF=87=20-=20Dashboard=E7=BB=9F=E8=AE=A1=E5=A2=9E=E5=BC=BA?= =?UTF-8?q?=E3=80=81=E6=B5=81=E7=A8=8B=E6=B5=8B=E8=AF=95=E3=80=81=E5=AF=B9?= =?UTF-8?q?=E8=AF=9D=E8=BF=BD=E8=B8=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 验收通过的标准: - AC-ASA-59~64: 前端话术流程和护栏监控功能验收 - AC-AISVC-91~95: Dashboard统计增强和完整流程测试验收 - AC-AISVC-108~110: 对话追踪和导出功能验收 修复问题: - flow_test.py: 修复OrchestratorService导入和调用 - 前后端字段不一致: orderstep_no, wait_for_inputwait_input - 数据库迁移: 添加chat_messages缺失的监控字段 新增文件: - ai-service/app/api/admin/flow_test.py - ai-service/scripts/migrations/add_chat_message_fields.py - ai-service-admin/src/views/admin/prompt-template/components/VariableManager.vue --- ...7.0-window3-dashboard-tracking-progress.md | 19 + .gitignore | 2 + agents.md | 7 +- ai-service-admin/src/types/prompt-template.ts | 6 +- ai-service-admin/src/types/script-flow.ts | 4 +- .../components/PreviewDialog.vue | 29 +- .../components/TemplateDetail.vue | 6 +- .../components/VariableManager.vue | 283 +++ .../src/views/admin/prompt-template/index.vue | 20 +- .../script-flow/components/FlowPreview.vue | 4 +- .../src/views/admin/script-flow/index.vue | 14 +- ai-service/app/api/admin/api_key.py | 20 +- ai-service/app/api/admin/dashboard.py | 158 +- ai-service/app/api/admin/embedding.py | 32 +- ai-service/app/api/admin/flow_test.py | 402 +++ ai-service/app/api/admin/kb.py | 429 +++- ai-service/app/api/admin/kb_optimized.py | 58 +- ai-service/app/api/admin/llm.py | 1 - ai-service/app/api/admin/monitoring.py | 23 + ai-service/app/api/admin/rag.py | 23 +- ai-service/app/api/admin/sessions.py | 7 +- ai-service/app/api/chat.py | 16 +- ai-service/app/core/__init__.py | 2 +- ai-service/app/core/config.py | 7 +- ai-service/app/core/database.py | 4 +- ai-service/app/core/exceptions.py | 3 + ai-service/app/core/middleware.py | 25 +- ai-service/app/core/prompts.py | 32 +- ai-service/app/core/qdrant_client.py | 274 ++- ai-service/app/core/sse.py | 7 +- ai-service/app/models/entities.py | 154 +- ai-service/app/services/api_key.py | 86 +- ai-service/app/services/context.py | 2 +- ai-service/app/services/document/base.py | 4 +- .../app/services/document/excel_parser.py | 66 +- ai-service/app/services/document/factory.py | 42 +- .../app/services/document/pdf_parser.py | 50 +- .../app/services/document/text_parser.py | 18 +- .../app/services/document/word_parser.py | 28 +- ai-service/app/services/embedding/__init__.py | 6 +- ai-service/app/services/embedding/base.py | 2 +- ai-service/app/services/embedding/factory.py | 72 +- .../app/services/embedding/nomic_provider.py | 31 +- .../services/embedding/ollama_embedding.py | 10 +- .../app/services/embedding/ollama_provider.py | 11 +- .../app/services/embedding/openai_provider.py | 18 +- ai-service/app/services/flow/__init__.py | 2 +- ai-service/app/services/flow/engine.py | 40 +- ai-service/app/services/flow/flow_service.py | 26 +- ai-service/app/services/intent/__init__.py | 2 +- ai-service/app/services/intent/router.py | 10 +- .../app/services/intent/rule_service.py | 5 +- ai-service/app/services/kb.py | 4 +- ai-service/app/services/llm/base.py | 3 +- ai-service/app/services/llm/factory.py | 16 +- ai-service/app/services/llm/openai_client.py | 15 +- ai-service/app/services/memory.py | 4 +- .../app/services/monitoring/prompt_monitor.py | 16 +- ai-service/app/services/orchestrator.py | 560 ++++- .../app/services/prompt/template_service.py | 45 +- .../app/services/prompt/variable_resolver.py | 32 +- ai-service/app/services/retrieval/__init__.py | 23 +- ai-service/app/services/retrieval/indexer.py | 109 +- ai-service/app/services/retrieval/metadata.py | 26 +- .../services/retrieval/optimized_retriever.py | 175 +- .../services/retrieval/vector_retriever.py | 7 +- .../progress/phase11_multi_kb_progress.md | 178 ++ ..._97d97b90-a146-4141-9194-f24d7efcae0e.json | 1 + ai-service/pyproject.toml | 1 + .../migrations/002_add_monitoring_fields.sql | 74 + .../migrations/add_chat_message_fields.py | 57 + docs/intent-rule-usage-appendix.md | 115 + docs/intent-rule-usage-final.txt | 38 + docs/intent-rule-usage.md | 322 +++ docs/prompt-template-analysis.md | 626 +++++ docs/script-flow-usage.md | 663 +++++ docs/session-handoff-protocol.md | 18 +- docs/spec-product-zh.md | 147 +- spec/ai-service-admin/requirements.md | 255 +- spec/ai-service-admin/tasks.md | 132 +- spec/ai-service/design.md | 1828 +++++++++++++- spec/ai-service/openapi.admin.yaml | 2180 ++++++++++++++++- spec/ai-service/requirements.md | 347 ++- spec/ai-service/tasks.md | 169 +- 84 files changed, 9933 insertions(+), 855 deletions(-) create mode 100644 ai-service-admin/src/views/admin/prompt-template/components/VariableManager.vue create mode 100644 ai-service/app/api/admin/flow_test.py create mode 100644 ai-service/docs/progress/phase11_multi_kb_progress.md create mode 100644 ai-service/exports/conversations_default@ash@2026_97d97b90-a146-4141-9194-f24d7efcae0e.json create mode 100644 ai-service/scripts/migrations/002_add_monitoring_fields.sql create mode 100644 ai-service/scripts/migrations/add_chat_message_fields.py create mode 100644 docs/intent-rule-usage-appendix.md create mode 100644 docs/intent-rule-usage-final.txt create mode 100644 docs/intent-rule-usage.md create mode 100644 docs/prompt-template-analysis.md create mode 100644 docs/script-flow-usage.md diff --git a/.claude/progress/v0.7.0-window3-dashboard-tracking-progress.md b/.claude/progress/v0.7.0-window3-dashboard-tracking-progress.md index 533b9fa..1d5d929 100644 --- a/.claude/progress/v0.7.0-window3-dashboard-tracking-progress.md +++ b/.claude/progress/v0.7.0-window3-dashboard-tracking-progress.md @@ -188,6 +188,25 @@ - 对话追踪页面 - 监控导航路由 +### 会话 3 (2026-02-28) - 验收会话 +- 完成: 前端验收标准 AC-ASA-59 ~ AC-ASA-64 验收通过 + - AC-ASA-59: 流程模拟对话框 - 步骤可视化 ✅ + - AC-ASA-60: 话术流程监控页面 - 流程激活统计 ✅ + - AC-ASA-61: 流程执行记录详情弹窗 - 分页支持 ✅ + - AC-ASA-62: 护栏测试对话框 - 禁词检测结果 ✅ + - AC-ASA-63: 输出护栏监控页面 - 护栏拦截统计 ✅ + - AC-ASA-64: 护栏拦截记录详情弹窗 - 分页支持 ✅ +- 完成: 后端验收标准 AC-AISVC-91 ~ AC-AISVC-95, AC-AISVC-108 ~ AC-AISVC-110 验收通过 + - AC-AISVC-91/92: Dashboard统计增强 - 四个监控统计卡片+时间筛选 ✅ + - AC-AISVC-93/94/95: 完整流程测试 - 12步执行时间线+步骤详情 ✅ + - AC-AISVC-108/109/110: 对话追踪 - 列表+详情+导出 ✅ +- 修复问题: + - flow_test.py 导入错误: Orchestrator → OrchestratorService + - flow_test.py ChatRequest 导入路径修正 + - flow_test.py ChatResponse.sources 属性不存在 + - 数据库迁移: 创建 add_chat_message_fields.py 添加缺失字段 + - 前后端字段不一致: order → step_no, wait_for_input → wait_input + ## 7. 下一步行动 **任务已全部完成** diff --git a/.gitignore b/.gitignore index 02f7c61..49497a1 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,5 @@ ai-service/uploads/ ai-service/config/ *.local +/.trae/ +/.claude/ diff --git a/agents.md b/agents.md index b1b32f6..af177bd 100644 --- a/agents.md +++ b/agents.md @@ -6,10 +6,15 @@ - `spec//requirements.md`(当前模块需求) - `spec//openapi.provider.yaml`(本模块提供) - `spec//openapi.deps.yaml`(本模块依赖,如存在) +- **版本化迭代规则**(CRITICAL): + - 读取 `requirements.md` 的 frontmatter,识别 `active_version` 字段(如 `”0.6.0-0.7.0”`) + - **仅关注活跃版本的 AC**:历史版本(折叠在 `
` 中)的 AC 可跳过,不影响当前实现 + - 在代码注释、测试用例、commit message 中引用的 AC 编号,必须在活跃版本范围内 + - 若需要追加新需求,按 `docs/spec-product-zh.md` 第 4 节”版本化迭代规则”执行 - **长会话/复杂任务接续**:若当前任务满足 `docs/session-handoff-protocol.md` 中的触发条件,**必须**先读取并持续更新 `docs/progress/{module}-{feature}-progress.md`。 - 若上述任一文档缺失、冲突或内容不明确: - **禁止开始实现** - - 必须在 `spec//tasks.md` 记录“待澄清”并停止 + - 必须在 `spec//tasks.md` 记录”待澄清”并停止 ## 1. 提交与同步(Git Cadence,必须) - **提交粒度**: diff --git a/ai-service-admin/src/types/prompt-template.ts b/ai-service-admin/src/types/prompt-template.ts index 5a190dc..18eeb49 100644 --- a/ai-service-admin/src/types/prompt-template.ts +++ b/ai-service-admin/src/types/prompt-template.ts @@ -48,7 +48,8 @@ export interface PromptTemplateCreate { name: string scene: string description?: string - content: string + system_instruction: string + variables?: PromptVariable[] is_default?: boolean } @@ -56,7 +57,8 @@ export interface PromptTemplateUpdate { name?: string scene?: string description?: string - content: string + system_instruction?: string + variables?: PromptVariable[] } export interface PromptTemplateListResponse { diff --git a/ai-service-admin/src/types/script-flow.ts b/ai-service-admin/src/types/script-flow.ts index ca06b8b..b85e3b3 100644 --- a/ai-service-admin/src/types/script-flow.ts +++ b/ai-service-admin/src/types/script-flow.ts @@ -21,9 +21,9 @@ export interface ScriptFlowDetail { export interface FlowStep { step_id: string - order: number + step_no: number content: string - wait_for_input: boolean + wait_input: boolean timeout_seconds?: number timeout_action?: 'repeat' | 'skip' | 'transfer' next_conditions?: NextCondition[] diff --git a/ai-service-admin/src/views/admin/prompt-template/components/PreviewDialog.vue b/ai-service-admin/src/views/admin/prompt-template/components/PreviewDialog.vue index 119741a..e6d5952 100644 --- a/ai-service-admin/src/views/admin/prompt-template/components/PreviewDialog.vue +++ b/ai-service-admin/src/views/admin/prompt-template/components/PreviewDialog.vue @@ -14,7 +14,7 @@

变量设置

@@ -102,9 +102,9 @@ + + diff --git a/ai-service-admin/src/views/admin/prompt-template/index.vue b/ai-service-admin/src/views/admin/prompt-template/index.vue index 6847f94..4e38aea 100644 --- a/ai-service-admin/src/views/admin/prompt-template/index.vue +++ b/ai-service-admin/src/views/admin/prompt-template/index.vue @@ -100,11 +100,11 @@ - +
({ name: '', scene: '', description: '', - content: '', + system_instruction: '', variables: [] }) const formRules = { name: [{ required: true, message: '请输入模板名称', trigger: 'blur' }], scene: [{ required: true, message: '请选择场景', trigger: 'change' }], - content: [{ required: true, message: '请输入系统指令内容', trigger: 'blur' }] + system_instruction: [{ required: true, message: '请输入系统指令内容', trigger: 'blur' }] } const getSceneLabel = (scene: string) => { @@ -269,7 +269,7 @@ const handleCreate = () => { name: '', scene: '', description: '', - content: '', + system_instruction: '', variables: [] } dialogVisible.value = true @@ -284,7 +284,7 @@ const handleEdit = async (row: PromptTemplate) => { name: detail.name, scene: detail.scene, description: detail.description || '', - content: detail.current_content || '', + system_instruction: detail.current_content || '', variables: detail.variables || [] } dialogVisible.value = true @@ -365,7 +365,7 @@ const handleSubmit = async () => { name: formData.value.name, scene: formData.value.scene, description: formData.value.description, - content: formData.value.content, + system_instruction: formData.value.system_instruction, variables: formData.value.variables } await updatePromptTemplate(currentEditId.value, updateData) @@ -388,15 +388,15 @@ const insertVariable = (varName: string) => { if (textarea) { const start = textarea.selectionStart const end = textarea.selectionEnd - const text = formData.value.content || '' + const text = formData.value.system_instruction || '' const insertText = `{{${varName}}}` - formData.value.content = text.substring(0, start) + insertText + text.substring(end) + formData.value.system_instruction = text.substring(0, start) + insertText + text.substring(end) setTimeout(() => { textarea.focus() textarea.setSelectionRange(start + insertText.length, start + insertText.length) }, 0) } else { - formData.value.content = (formData.value.content || '') + `{{${varName}}}` + formData.value.system_instruction = (formData.value.system_instruction || '') + `{{${varName}}}` } } diff --git a/ai-service-admin/src/views/admin/script-flow/components/FlowPreview.vue b/ai-service-admin/src/views/admin/script-flow/components/FlowPreview.vue index 9eefb95..8e76ace 100644 --- a/ai-service-admin/src/views/admin/script-flow/components/FlowPreview.vue +++ b/ai-service-admin/src/views/admin/script-flow/components/FlowPreview.vue @@ -16,12 +16,12 @@
步骤 {{ index + 1 }} - 等待输入 + 等待输入
{{ step.content }}
-
+
超时时间: {{ step.timeout_seconds }}秒 diff --git a/ai-service-admin/src/views/admin/script-flow/index.vue b/ai-service-admin/src/views/admin/script-flow/index.vue index dbe6a36..5898535 100644 --- a/ai-service-admin/src/views/admin/script-flow/index.vue +++ b/ai-service-admin/src/views/admin/script-flow/index.vue @@ -118,15 +118,15 @@ - + - + - + { const addStep = () => { formData.value.steps.push({ step_id: generateStepId(), - order: formData.value.steps.length + 1, + step_no: formData.value.steps.length + 1, content: '', - wait_for_input: true, + wait_input: true, timeout_seconds: 30, timeout_action: 'repeat', next_conditions: [] @@ -319,7 +319,7 @@ const addStep = () => { const removeStep = (index: number) => { formData.value.steps.splice(index, 1) formData.value.steps.forEach((step, i) => { - step.order = i + 1 + step.step_no = i + 1 }) } @@ -341,7 +341,7 @@ const handleSubmit = async () => { ...formData.value, steps: formData.value.steps.map((step, index) => ({ ...step, - order: index + 1 + step_no: index + 1 })) } diff --git a/ai-service/app/api/admin/api_key.py b/ai-service/app/api/admin/api_key.py index b73b78b..121dc97 100644 --- a/ai-service/app/api/admin/api_key.py +++ b/ai-service/app/api/admin/api_key.py @@ -71,7 +71,7 @@ async def list_api_keys( """ service = get_api_key_service() keys = await service.list_keys(session) - + return ApiKeyListResponse( keys=[api_key_to_response(k) for k in keys], total=len(keys), @@ -87,18 +87,18 @@ async def create_api_key( [AC-AISVC-50] Create a new API key. """ service = get_api_key_service() - + key_value = request.key or service.generate_key() - + key_create = ApiKeyCreate( key=key_value, name=request.name, is_active=True, ) - + api_key = await service.create_key(session, key_create) logger.info(f"[AC-AISVC-50] Created API key: {api_key.name}") - + return api_key_to_response(api_key) @@ -111,9 +111,9 @@ async def delete_api_key( [AC-AISVC-50] Delete an API key. """ service = get_api_key_service() - + deleted = await service.delete_key(session, key_id) - + if not deleted: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -131,15 +131,15 @@ async def toggle_api_key( [AC-AISVC-50] Toggle API key active status. """ service = get_api_key_service() - + api_key = await service.toggle_key(session, key_id, request.is_active) - + if not api_key: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="API key not found", ) - + return api_key_to_response(api_key) diff --git a/ai-service/app/api/admin/dashboard.py b/ai-service/app/api/admin/dashboard.py index e9cf199..e221eec 100644 --- a/ai-service/app/api/admin/dashboard.py +++ b/ai-service/app/api/admin/dashboard.py @@ -1,14 +1,16 @@ """ Dashboard statistics endpoints. -Provides overview statistics for the admin dashboard. +[AC-AISVC-91, AC-AISVC-92] Provides overview statistics for the admin dashboard. +Enhanced with monitoring metrics for intent rules, templates, flows, and guardrails. """ import logging -from typing import Annotated +from datetime import datetime, timedelta +from typing import Annotated, Any from fastapi import APIRouter, Depends, Query from fastapi.responses import JSONResponse -from sqlalchemy import select, func, desc +from sqlalchemy import desc, func, select from sqlalchemy.ext.asyncio import AsyncSession from app.core.database import get_session @@ -16,6 +18,8 @@ from app.core.exceptions import MissingTenantIdException from app.core.tenant import get_tenant_id from app.models import ErrorResponse from app.models.entities import ChatMessage, ChatSession, Document, KnowledgeBase +from app.services.monitoring.cache import get_monitoring_cache +from app.services.monitoring.dashboard_service import DashboardService logger = logging.getLogger(__name__) @@ -32,11 +36,21 @@ def get_current_tenant_id() -> str: return tenant_id +def parse_date_param(date_str: str | None) -> datetime | None: + """Parse ISO 8601 date string to datetime.""" + if not date_str: + return None + try: + return datetime.fromisoformat(date_str.replace("Z", "+00:00")) + except ValueError: + return None + + @router.get( "/stats", operation_id="getDashboardStats", summary="Get dashboard statistics", - description="Get overview statistics for the admin dashboard.", + description="[AC-AISVC-91, AC-AISVC-92] Get overview statistics for the admin dashboard with enhanced monitoring metrics.", responses={ 200: {"description": "Dashboard statistics"}, 401: {"description": "Unauthorized", "model": ErrorResponse}, @@ -47,11 +61,21 @@ async def get_dashboard_stats( tenant_id: Annotated[str, Depends(get_current_tenant_id)], session: Annotated[AsyncSession, Depends(get_session)], latency_threshold: int = Query(default=LATENCY_THRESHOLD_MS, description="Latency threshold in ms"), + start_date: str | None = Query(default=None, description="Start date filter (ISO 8601)"), + end_date: str | None = Query(default=None, description="End date filter (ISO 8601)"), + include_enhanced: bool = Query(default=True, description="Include enhanced monitoring stats"), ) -> JSONResponse: """ - Get dashboard statistics including knowledge bases, messages, and activity. + [AC-AISVC-91, AC-AISVC-92] Get dashboard statistics including: + - Basic stats: knowledge bases, messages, documents, sessions + - Token statistics + - Latency statistics + - Enhanced stats (v0.7.0): intent rules, templates, flows, guardrails """ - logger.info(f"Getting dashboard stats: tenant={tenant_id}") + logger.info(f"Getting dashboard stats: tenant={tenant_id}, start={start_date}, end={end_date}") + + start_dt = parse_date_param(start_date) + end_dt = parse_date_param(end_date) kb_count_stmt = select(func.count()).select_from(KnowledgeBase).where( KnowledgeBase.tenant_id == tenant_id @@ -62,6 +86,10 @@ async def get_dashboard_stats( msg_count_stmt = select(func.count()).select_from(ChatMessage).where( ChatMessage.tenant_id == tenant_id ) + if start_dt: + msg_count_stmt = msg_count_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + msg_count_stmt = msg_count_stmt.where(ChatMessage.created_at <= end_dt) msg_result = await session.execute(msg_count_stmt) msg_count = msg_result.scalar() or 0 @@ -74,24 +102,40 @@ async def get_dashboard_stats( session_count_stmt = select(func.count()).select_from(ChatSession).where( ChatSession.tenant_id == tenant_id ) + if start_dt: + session_count_stmt = session_count_stmt.where(ChatSession.created_at >= start_dt) + if end_dt: + session_count_stmt = session_count_stmt.where(ChatSession.created_at <= end_dt) session_result = await session.execute(session_count_stmt) session_count = session_result.scalar() or 0 total_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.total_tokens), 0)).select_from( ChatMessage ).where(ChatMessage.tenant_id == tenant_id) + if start_dt: + total_tokens_stmt = total_tokens_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + total_tokens_stmt = total_tokens_stmt.where(ChatMessage.created_at <= end_dt) total_tokens_result = await session.execute(total_tokens_stmt) total_tokens = total_tokens_result.scalar() or 0 prompt_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.prompt_tokens), 0)).select_from( ChatMessage ).where(ChatMessage.tenant_id == tenant_id) + if start_dt: + prompt_tokens_stmt = prompt_tokens_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + prompt_tokens_stmt = prompt_tokens_stmt.where(ChatMessage.created_at <= end_dt) prompt_tokens_result = await session.execute(prompt_tokens_stmt) prompt_tokens = prompt_tokens_result.scalar() or 0 completion_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.completion_tokens), 0)).select_from( ChatMessage ).where(ChatMessage.tenant_id == tenant_id) + if start_dt: + completion_tokens_stmt = completion_tokens_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + completion_tokens_stmt = completion_tokens_stmt.where(ChatMessage.created_at <= end_dt) completion_tokens_result = await session.execute(completion_tokens_stmt) completion_tokens = completion_tokens_result.scalar() or 0 @@ -99,6 +143,10 @@ async def get_dashboard_stats( ChatMessage.tenant_id == tenant_id, ChatMessage.role == "assistant" ) + if start_dt: + ai_requests_stmt = ai_requests_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + ai_requests_stmt = ai_requests_stmt.where(ChatMessage.created_at <= end_dt) ai_requests_result = await session.execute(ai_requests_stmt) ai_requests_count = ai_requests_result.scalar() or 0 @@ -109,6 +157,10 @@ async def get_dashboard_stats( ChatMessage.role == "assistant", ChatMessage.latency_ms.isnot(None) ) + if start_dt: + avg_latency_stmt = avg_latency_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + avg_latency_stmt = avg_latency_stmt.where(ChatMessage.created_at <= end_dt) avg_latency_result = await session.execute(avg_latency_stmt) avg_latency_ms = float(avg_latency_result.scalar() or 0) @@ -127,6 +179,10 @@ async def get_dashboard_stats( ChatMessage.latency_ms.isnot(None), ChatMessage.latency_ms >= latency_threshold ) + if start_dt: + slow_requests_stmt = slow_requests_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + slow_requests_stmt = slow_requests_stmt.where(ChatMessage.created_at <= end_dt) slow_requests_result = await session.execute(slow_requests_stmt) slow_requests_count = slow_requests_result.scalar() or 0 @@ -135,6 +191,10 @@ async def get_dashboard_stats( ChatMessage.role == "assistant", ChatMessage.is_error == True ) + if start_dt: + error_requests_stmt = error_requests_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + error_requests_stmt = error_requests_stmt.where(ChatMessage.created_at <= end_dt) error_requests_result = await session.execute(error_requests_stmt) error_requests_count = error_requests_result.scalar() or 0 @@ -145,6 +205,10 @@ async def get_dashboard_stats( ChatMessage.role == "assistant", ChatMessage.latency_ms.isnot(None) ) + if start_dt: + p95_latency_stmt = p95_latency_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + p95_latency_stmt = p95_latency_stmt.where(ChatMessage.created_at <= end_dt) p95_latency_result = await session.execute(p95_latency_stmt) p95_latency_ms = float(p95_latency_result.scalar() or 0) @@ -155,6 +219,10 @@ async def get_dashboard_stats( ChatMessage.role == "assistant", ChatMessage.latency_ms.isnot(None) ) + if start_dt: + p99_latency_stmt = p99_latency_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + p99_latency_stmt = p99_latency_stmt.where(ChatMessage.created_at <= end_dt) p99_latency_result = await session.execute(p99_latency_stmt) p99_latency_ms = float(p99_latency_result.scalar() or 0) @@ -165,6 +233,10 @@ async def get_dashboard_stats( ChatMessage.role == "assistant", ChatMessage.latency_ms.isnot(None) ) + if start_dt: + min_latency_stmt = min_latency_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + min_latency_stmt = min_latency_stmt.where(ChatMessage.created_at <= end_dt) min_latency_result = await session.execute(min_latency_stmt) min_latency_ms = float(min_latency_result.scalar() or 0) @@ -175,28 +247,58 @@ async def get_dashboard_stats( ChatMessage.role == "assistant", ChatMessage.latency_ms.isnot(None) ) + if start_dt: + max_latency_stmt = max_latency_stmt.where(ChatMessage.created_at >= start_dt) + if end_dt: + max_latency_stmt = max_latency_stmt.where(ChatMessage.created_at <= end_dt) max_latency_result = await session.execute(max_latency_stmt) max_latency_ms = float(max_latency_result.scalar() or 0) - return JSONResponse( - content={ - "knowledgeBases": kb_count, - "totalMessages": msg_count, - "totalDocuments": doc_count, - "totalSessions": session_count, - "totalTokens": total_tokens, - "promptTokens": prompt_tokens, - "completionTokens": completion_tokens, - "aiRequestsCount": ai_requests_count, - "avgLatencyMs": round(avg_latency_ms, 2), - "lastLatencyMs": last_latency_ms, - "lastRequestTime": last_request_time, - "slowRequestsCount": slow_requests_count, - "errorRequestsCount": error_requests_count, - "p95LatencyMs": round(p95_latency_ms, 2), - "p99LatencyMs": round(p99_latency_ms, 2), - "minLatencyMs": round(min_latency_ms, 2), - "maxLatencyMs": round(max_latency_ms, 2), - "latencyThresholdMs": latency_threshold, - } - ) + response_data: dict[str, Any] = { + "knowledgeBases": kb_count, + "totalMessages": msg_count, + "totalDocuments": doc_count, + "totalSessions": session_count, + "totalTokens": total_tokens, + "promptTokens": prompt_tokens, + "completionTokens": completion_tokens, + "aiRequestsCount": ai_requests_count, + "avgLatencyMs": round(avg_latency_ms, 2), + "lastLatencyMs": last_latency_ms, + "lastRequestTime": last_request_time, + "slowRequestsCount": slow_requests_count, + "errorRequestsCount": error_requests_count, + "p95LatencyMs": round(p95_latency_ms, 2), + "p99LatencyMs": round(p99_latency_ms, 2), + "minLatencyMs": round(min_latency_ms, 2), + "maxLatencyMs": round(max_latency_ms, 2), + "latencyThresholdMs": latency_threshold, + } + + if include_enhanced: + try: + cache = get_monitoring_cache() + dashboard_service = DashboardService(session, cache) + enhanced_stats = await dashboard_service.get_enhanced_stats( + tenant_id=tenant_id, + start_date=start_dt, + end_date=end_dt, + ) + response_data.update(enhanced_stats.to_dict()) + except Exception as e: + logger.warning(f"Failed to get enhanced stats: {e}") + response_data.update({ + "intentRuleHitRate": 0.0, + "intentRuleHitCount": 0, + "topIntentRules": [], + "promptTemplateUsageCount": 0, + "topPromptTemplates": [], + "scriptFlowActivationCount": 0, + "scriptFlowCompletionRate": 0.0, + "topScriptFlows": [], + "guardrailBlockCount": 0, + "guardrailBlockRate": 0.0, + "topGuardrailWords": [], + }) + + return JSONResponse(content=response_data) diff --git a/ai-service/app/api/admin/embedding.py b/ai-service/app/api/admin/embedding.py index 5937296..528f402 100644 --- a/ai-service/app/api/admin/embedding.py +++ b/ai-service/app/api/admin/embedding.py @@ -39,7 +39,7 @@ async def list_embedding_providers( for name in EmbeddingProviderFactory.get_available_providers(): info = EmbeddingProviderFactory.get_provider_info(name) providers.append(info) - + return {"providers": providers} @@ -66,32 +66,32 @@ async def update_embedding_config( """ provider = request.get("provider") config = request.get("config", {}) - + if not provider: raise InvalidRequestException("provider is required") - + if provider not in EmbeddingProviderFactory.get_available_providers(): raise InvalidRequestException( f"Unknown provider: {provider}. " f"Available: {EmbeddingProviderFactory.get_available_providers()}" ) - + manager = get_embedding_config_manager() - + old_config = manager.get_full_config() old_provider = old_config.get("provider") old_model = old_config.get("config", {}).get("model", "") - + new_model = config.get("model", "") - + try: await manager.update_config(provider, config) - + response = { "success": True, "message": f"Configuration updated to use {provider}", } - + if old_provider != provider or old_model != new_model: response["warning"] = ( "嵌入模型已更改。由于不同模型生成的向量不兼容," @@ -102,7 +102,7 @@ async def update_embedding_config( f"[EMBEDDING] Model changed from {old_provider}/{old_model} to {provider}/{new_model}. " f"Documents need to be re-uploaded." ) - + return response except EmbeddingException as e: raise InvalidRequestException(str(e)) @@ -121,15 +121,15 @@ async def test_embedding( test_text = request.get("test_text", "这是一个测试文本") config = request.get("config") provider = request.get("provider") - + manager = get_embedding_config_manager() - + result = await manager.test_connection( test_text=test_text, provider=provider, config=config, ) - + return result @@ -141,11 +141,11 @@ async def get_supported_document_formats( Get supported document formats for embedding. Returns list of supported file extensions. """ - from app.services.document import get_supported_document_formats, DocumentParserFactory - + from app.services.document import DocumentParserFactory, get_supported_document_formats + formats = get_supported_document_formats() parser_info = DocumentParserFactory.get_parser_info() - + return { "formats": formats, "parsers": parser_info, diff --git a/ai-service/app/api/admin/flow_test.py b/ai-service/app/api/admin/flow_test.py new file mode 100644 index 0000000..69eb7f3 --- /dev/null +++ b/ai-service/app/api/admin/flow_test.py @@ -0,0 +1,402 @@ +""" +Flow test API for AI Service Admin. +[AC-AISVC-93~AC-AISVC-95] Complete 12-step flow execution testing. +""" + +import logging +import uuid +from datetime import datetime +from typing import Any + +from fastapi import APIRouter, Depends, Header, HTTPException, Query +from pydantic import BaseModel +from sqlalchemy import desc, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_session +from app.models.entities import FlowTestRecord, FlowTestRecordStatus + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/test", tags=["Flow Test"]) + + +def get_tenant_id(x_tenant_id: str = Header(..., alias="X-Tenant-Id")) -> str: + """Extract tenant ID from header.""" + if not x_tenant_id: + raise HTTPException(status_code=400, detail="X-Tenant-Id header is required") + return x_tenant_id + + +class FlowExecutionRequest(BaseModel): + """Request for flow execution test.""" + + message: str + session_id: str | None = None + user_id: str | None = None + enable_flow: bool = True + enable_intent: bool = True + enable_rag: bool = True + enable_guardrail: bool = True + enable_memory: bool = True + compare_mode: bool = False + + +class FlowExecutionResponse(BaseModel): + """Response for flow execution test.""" + + test_id: str + session_id: str + status: str + steps: list[dict[str, Any]] + final_response: dict[str, Any] | None + total_duration_ms: int + created_at: str + + +@router.post( + "/flow-execution", + operation_id="executeFlowTest", + summary="Execute complete 12-step flow", + description="[AC-AISVC-93] Execute complete 12-step generation flow with detailed step logging.", +) +async def execute_flow_test( + request: FlowExecutionRequest, + tenant_id: str = Depends(get_tenant_id), + session: AsyncSession = Depends(get_session), +) -> FlowExecutionResponse: + """ + [AC-AISVC-93] Execute complete 12-step flow for testing. + + Steps: + 1. InputScanner - Scan input for forbidden words + 2. FlowEngine - Check if flow is active + 3. IntentRouter - Match intent rules + 4. QueryRewriter - Rewrite query for better retrieval + 5. MultiKBRetrieval - Retrieve from multiple knowledge bases + 6. ResultRanker - Rank and filter results + 7. PromptBuilder - Build prompt from template + 8. LLMGenerate - Generate response via LLM + 9. OutputFilter - Filter output for forbidden words + 10. Confidence - Calculate confidence score + 11. Memory - Store conversation in memory + 12. Response - Return final response + """ + import time + + from app.models import ChatRequest, ChannelType + from app.services.llm.factory import get_llm_config_manager + from app.services.memory import MemoryService + from app.services.orchestrator import OrchestratorService + from app.services.retrieval.optimized_retriever import get_optimized_retriever + + logger.info( + f"[AC-AISVC-93] Executing flow test for tenant={tenant_id}, " + f"message={request.message[:50]}..." + ) + + test_session_id = request.session_id or f"test_{uuid.uuid4().hex[:8]}" + start_time = time.time() + + memory_service = MemoryService(session) + llm_config_manager = get_llm_config_manager() + llm_client = llm_config_manager.get_client() + retriever = await get_optimized_retriever() + + orchestrator = OrchestratorService( + llm_client=llm_client, + memory_service=memory_service, + retriever=retriever, + ) + + try: + chat_request = ChatRequest( + session_id=test_session_id, + current_message=request.message, + channel_type=ChannelType.WECHAT, + history=[], + ) + + result = await orchestrator.generate( + tenant_id=tenant_id, + request=chat_request, + ) + + steps = result.metadata.get("execution_steps", []) if result.metadata else [] + total_duration_ms = int((time.time() - start_time) * 1000) + + has_failure = any(s.get("status") == "failed" for s in steps) + has_partial = any(s.get("status") == "skipped" for s in steps) + + if has_failure: + status = FlowTestRecordStatus.FAILED.value + elif has_partial: + status = FlowTestRecordStatus.PARTIAL.value + else: + status = FlowTestRecordStatus.SUCCESS.value + + test_record = FlowTestRecord( + tenant_id=tenant_id, + session_id=test_session_id, + status=status, + steps=steps, + final_response={ + "reply": result.reply, + "confidence": result.confidence, + "should_transfer": result.should_transfer, + }, + total_duration_ms=total_duration_ms, + ) + + try: + session.add(test_record) + await session.commit() + await session.refresh(test_record) + except Exception as db_error: + logger.warning(f"Failed to save test record: {db_error}") + await session.rollback() + + logger.info( + f"[AC-AISVC-93] Flow test completed: id={test_record.id}, " + f"status={status}, duration={total_duration_ms}ms" + ) + + return FlowExecutionResponse( + test_id=str(test_record.id), + session_id=test_session_id, + status=status, + steps=steps, + final_response=test_record.final_response, + total_duration_ms=total_duration_ms, + created_at=test_record.created_at.isoformat(), + ) + + except Exception as e: + logger.error(f"[AC-AISVC-93] Flow test failed: {e}") + + total_duration_ms = int((time.time() - start_time) * 1000) + + await session.rollback() + + test_record = FlowTestRecord( + tenant_id=tenant_id, + session_id=test_session_id, + status=FlowTestRecordStatus.FAILED.value, + steps=[{ + "step": 0, + "name": "Error", + "status": "failed", + "error": str(e), + }], + final_response=None, + total_duration_ms=total_duration_ms, + ) + session.add(test_record) + await session.commit() + await session.refresh(test_record) + + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get( + "/flow-execution/{test_id}", + operation_id="getFlowTestResult", + summary="Get flow test result", + description="[AC-AISVC-94] Get detailed result of a flow execution test.", +) +async def get_flow_test_result( + test_id: uuid.UUID, + tenant_id: str = Depends(get_tenant_id), + session: AsyncSession = Depends(get_session), +) -> dict[str, Any]: + """ + [AC-AISVC-94] Get detailed result of a flow execution test. + Returns step-by-step execution details for debugging. + """ + logger.info( + f"[AC-AISVC-94] Getting flow test result for tenant={tenant_id}, " + f"test_id={test_id}" + ) + + stmt = select(FlowTestRecord).where( + FlowTestRecord.id == test_id, + FlowTestRecord.tenant_id == tenant_id, + ) + result = await session.execute(stmt) + record = result.scalar_one_or_none() + + if not record: + raise HTTPException(status_code=404, detail="Test record not found") + + return { + "testId": str(record.id), + "sessionId": record.session_id, + "status": record.status, + "steps": record.steps, + "finalResponse": record.final_response, + "totalDurationMs": record.total_duration_ms, + "createdAt": record.created_at.isoformat(), + } + + +@router.get( + "/flow-executions", + operation_id="listFlowTests", + summary="List flow test records", + description="[AC-AISVC-95] List flow test execution records.", +) +async def list_flow_tests( + tenant_id: str = Depends(get_tenant_id), + session_id: str | None = Query(None, description="Filter by session ID"), + status: str | None = Query(None, description="Filter by status"), + page: int = Query(1, ge=1, description="Page number"), + page_size: int = Query(20, ge=1, le=100, description="Page size"), + session: AsyncSession = Depends(get_session), +) -> dict[str, Any]: + """ + [AC-AISVC-95] List flow test execution records. + Records are retained for 7 days. + """ + logger.info( + f"[AC-AISVC-95] Listing flow tests for tenant={tenant_id}, " + f"session={session_id}, page={page}" + ) + + stmt = select(FlowTestRecord).where( + FlowTestRecord.tenant_id == tenant_id, + ) + + if session_id: + stmt = stmt.where(FlowTestRecord.session_id == session_id) + if status: + stmt = stmt.where(FlowTestRecord.status == status) + + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await session.execute(count_stmt) + total = total_result.scalar() or 0 + + stmt = stmt.order_by(desc(FlowTestRecord.created_at)) + stmt = stmt.offset((page - 1) * page_size).limit(page_size) + + result = await session.execute(stmt) + records = result.scalars().all() + + return { + "data": [ + { + "testId": str(r.id), + "sessionId": r.session_id, + "status": r.status, + "stepCount": len(r.steps), + "totalDurationMs": r.total_duration_ms, + "createdAt": r.created_at.isoformat(), + } + for r in records + ], + "page": page, + "pageSize": page_size, + "total": total, + } + + +class CompareRequest(BaseModel): + """Request for comparison test.""" + + message: str + baseline_config: dict[str, Any] | None = None + test_config: dict[str, Any] | None = None + + +@router.post( + "/compare", + operation_id="compareFlowTest", + summary="Compare two flow executions", + description="[AC-AISVC-95] Compare baseline and test configurations.", +) +async def compare_flow_test( + request: CompareRequest, + tenant_id: str = Depends(get_tenant_id), + session: AsyncSession = Depends(get_session), +) -> dict[str, Any]: + """ + [AC-AISVC-95] Compare two flow executions with different configurations. + + Useful for: + - A/B testing prompt templates + - Comparing RAG retrieval strategies + - Testing guardrail effectiveness + """ + import time + + from app.models import ChatRequest, ChannelType + from app.services.llm.factory import get_llm_config_manager + from app.services.memory import MemoryService + from app.services.orchestrator import OrchestratorService + from app.services.retrieval.optimized_retriever import get_optimized_retriever + + logger.info( + f"[AC-AISVC-95] Running comparison test for tenant={tenant_id}" + ) + + baseline_session_id = f"compare_baseline_{uuid.uuid4().hex[:8]}" + test_session_id = f"compare_test_{uuid.uuid4().hex[:8]}" + + memory_service = MemoryService(session) + llm_config_manager = get_llm_config_manager() + llm_client = llm_config_manager.get_client() + retriever = await get_optimized_retriever() + + orchestrator = OrchestratorService( + llm_client=llm_client, + memory_service=memory_service, + retriever=retriever, + ) + + baseline_chat_request = ChatRequest( + session_id=baseline_session_id, + current_message=request.message, + channel_type=ChannelType.WECHAT, + history=[], + ) + + baseline_start = time.time() + baseline_result = await orchestrator.generate( + tenant_id=tenant_id, + request=baseline_chat_request, + ) + baseline_duration = int((time.time() - baseline_start) * 1000) + + test_chat_request = ChatRequest( + session_id=test_session_id, + current_message=request.message, + channel_type=ChannelType.WECHAT, + history=[], + ) + + test_start = time.time() + test_result = await orchestrator.generate( + tenant_id=tenant_id, + request=test_chat_request, + ) + test_duration = int((time.time() - test_start) * 1000) + + return { + "baseline": { + "sessionId": baseline_session_id, + "reply": baseline_result.reply, + "confidence": baseline_result.confidence, + "durationMs": baseline_duration, + "steps": baseline_result.metadata.get("execution_steps", []) if baseline_result.metadata else [], + }, + "test": { + "sessionId": test_session_id, + "reply": test_result.reply, + "confidence": test_result.confidence, + "durationMs": test_duration, + "steps": test_result.metadata.get("execution_steps", []) if test_result.metadata else [], + }, + "comparison": { + "durationDiffMs": test_duration - baseline_duration, + "confidenceDiff": (test_result.confidence or 0) - (baseline_result.confidence or 0), + }, + } diff --git a/ai-service/app/api/admin/kb.py b/ai-service/app/api/admin/kb.py index e185e0e..b36edd0 100644 --- a/ai-service/app/api/admin/kb.py +++ b/ai-service/app/api/admin/kb.py @@ -1,16 +1,16 @@ """ Knowledge Base management endpoints. [AC-ASA-01, AC-ASA-02, AC-ASA-08] Document upload, list, and index job status. +[AC-AISVC-59~AC-AISVC-64] Multi-knowledge-base management. """ import logging -import os import uuid from dataclasses import dataclass from typing import Annotated, Optional import tiktoken -from fastapi import APIRouter, BackgroundTasks, Depends, Query, UploadFile, File, Form +from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, Query, UploadFile from fastapi.responses import JSONResponse from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -19,8 +19,15 @@ from app.core.database import get_session from app.core.exceptions import MissingTenantIdException from app.core.tenant import get_tenant_id from app.models import ErrorResponse -from app.models.entities import DocumentStatus, IndexJob, IndexJobStatus +from app.models.entities import ( + IndexJob, + IndexJobStatus, + KBType, + KnowledgeBaseCreate, + KnowledgeBaseUpdate, +) from app.services.kb import KBService +from app.services.knowledge_base_service import KnowledgeBaseService logger = logging.getLogger(__name__) @@ -44,24 +51,24 @@ def chunk_text_by_lines( ) -> list[TextChunk]: """ 按行分块,每行作为一个独立的检索单元。 - + Args: text: 要分块的文本 min_line_length: 最小行长度,低于此长度的行会被跳过 source: 来源文件路径(可选) - + Returns: 分块列表,每个块对应一行文本 """ lines = text.split('\n') chunks: list[TextChunk] = [] - + for i, line in enumerate(lines): line = line.strip() - + if len(line) < min_line_length: continue - + chunks.append(TextChunk( text=line, start_token=i, @@ -69,7 +76,7 @@ def chunk_text_by_lines( page=None, source=source, )) - + return chunks @@ -82,14 +89,14 @@ def chunk_text_with_tiktoken( ) -> list[TextChunk]: """ 使用 tiktoken 按 token 数分块,支持重叠分块。 - + Args: text: 要分块的文本 chunk_size: 每个块的最大 token 数 overlap: 块之间的重叠 token 数 page: 页码(可选) source: 来源文件路径(可选) - + Returns: 分块列表,每个块包含文本及起始/结束位置 """ @@ -97,7 +104,7 @@ def chunk_text_with_tiktoken( tokens = encoding.encode(text) chunks: list[TextChunk] = [] start = 0 - + while start < len(tokens): end = min(start + chunk_size, len(tokens)) chunk_tokens = tokens[start:end] @@ -112,7 +119,7 @@ def chunk_text_with_tiktoken( if end == len(tokens): break start += chunk_size - overlap - + return chunks @@ -128,7 +135,7 @@ def get_current_tenant_id() -> str: "/knowledge-bases", operation_id="listKnowledgeBases", summary="Query knowledge base list", - description="Get list of knowledge bases for the current tenant.", + description="[AC-AISVC-60] Get list of knowledge bases for the current tenant with type and status filters.", responses={ 200: {"description": "Knowledge base list"}, 401: {"description": "Unauthorized", "model": ErrorResponse}, @@ -138,41 +145,264 @@ def get_current_tenant_id() -> str: async def list_knowledge_bases( tenant_id: Annotated[str, Depends(get_current_tenant_id)], session: Annotated[AsyncSession, Depends(get_session)], + kb_type: Annotated[Optional[str], Query()] = None, + is_enabled: Annotated[Optional[bool], Query()] = None, ) -> JSONResponse: """ - List all knowledge bases for the current tenant. + [AC-AISVC-60] List all knowledge bases for the current tenant. + Supports filtering by kb_type and is_enabled status. """ - logger.info(f"Listing knowledge bases: tenant={tenant_id}") + try: + logger.info(f"[AC-AISVC-60] Listing knowledge bases: tenant={tenant_id}, kb_type={kb_type}, is_enabled={is_enabled}") - kb_service = KBService(session) - knowledge_bases = await kb_service.list_knowledge_bases(tenant_id) - - kb_ids = [str(kb.id) for kb in knowledge_bases] - - doc_counts = {} - if kb_ids: - from sqlalchemy import func - from app.models.entities import Document - count_stmt = ( - select(Document.kb_id, func.count(Document.id).label("count")) - .where(Document.tenant_id == tenant_id, Document.kb_id.in_(kb_ids)) - .group_by(Document.kb_id) + kb_service = KnowledgeBaseService(session) + logger.info(f"[AC-AISVC-60] KnowledgeBaseService created, calling list_knowledge_bases...") + knowledge_bases = await kb_service.list_knowledge_bases( + tenant_id=tenant_id, + kb_type=kb_type, + is_enabled=is_enabled, ) - count_result = await session.execute(count_stmt) - for row in count_result: - doc_counts[row.kb_id] = row.count + logger.info(f"[AC-AISVC-60] Found {len(knowledge_bases)} knowledge bases") - data = [] - for kb in knowledge_bases: - kb_id_str = str(kb.id) - data.append({ - "id": kb_id_str, + data = [] + for kb in knowledge_bases: + data.append({ + "id": str(kb.id), + "name": kb.name, + "kbType": kb.kb_type, + "description": kb.description, + "priority": kb.priority, + "isEnabled": kb.is_enabled, + "docCount": kb.doc_count, + "createdAt": kb.created_at.isoformat() + "Z", + "updatedAt": kb.updated_at.isoformat() + "Z", + }) + + logger.info(f"[AC-AISVC-60] Returning {len(data)} knowledge bases") + return JSONResponse(content={"data": data}) + except Exception as e: + import traceback + logger.error(f"[AC-AISVC-60] Error listing knowledge bases: {type(e).__name__}: {e}\n{traceback.format_exc()}") + raise + + +@router.post( + "/knowledge-bases", + operation_id="createKnowledgeBase", + summary="Create knowledge base", + description="[AC-AISVC-59] Create a new knowledge base with specified type and priority.", + responses={ + 201: {"description": "Knowledge base created"}, + 400: {"description": "Bad Request - invalid kb_type"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, + status_code=201, +) +async def create_knowledge_base( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + kb_create: KnowledgeBaseCreate, +) -> JSONResponse: + """ + [AC-AISVC-59] Create a new knowledge base. + Initializes corresponding Qdrant Collection. + """ + valid_types = [t.value for t in KBType] + if kb_create.kb_type not in valid_types: + return JSONResponse( + status_code=400, + content={ + "code": "INVALID_KB_TYPE", + "message": f"Invalid kb_type: {kb_create.kb_type}", + "details": {"valid_types": valid_types}, + }, + ) + + logger.info( + f"[AC-AISVC-59] Creating knowledge base: tenant={tenant_id}, " + f"name={kb_create.name}, type={kb_create.kb_type}" + ) + + kb_service = KnowledgeBaseService(session) + kb = await kb_service.create_knowledge_base(tenant_id, kb_create) + await session.commit() + + return JSONResponse( + status_code=201, + content={ + "id": str(kb.id), "name": kb.name, - "documentCount": doc_counts.get(kb_id_str, 0), + "kbType": kb.kb_type, + "description": kb.description, + "priority": kb.priority, + "isEnabled": kb.is_enabled, + "docCount": kb.doc_count, "createdAt": kb.created_at.isoformat() + "Z", - }) + "updatedAt": kb.updated_at.isoformat() + "Z", + }, + ) - return JSONResponse(content={"data": data}) + +@router.get( + "/knowledge-bases/{kb_id}", + operation_id="getKnowledgeBase", + summary="Get knowledge base details", + description="Get detailed information about a specific knowledge base.", + responses={ + 200: {"description": "Knowledge base details"}, + 404: {"description": "Knowledge base not found"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def get_knowledge_base( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + kb_id: str, +) -> JSONResponse: + """ + Get a specific knowledge base by ID. + """ + logger.info(f"Getting knowledge base: tenant={tenant_id}, kb_id={kb_id}") + + kb_service = KnowledgeBaseService(session) + kb = await kb_service.get_knowledge_base(tenant_id, kb_id) + + if not kb: + return JSONResponse( + status_code=404, + content={ + "code": "KB_NOT_FOUND", + "message": f"Knowledge base {kb_id} not found", + }, + ) + + return JSONResponse( + content={ + "id": str(kb.id), + "name": kb.name, + "kbType": kb.kb_type, + "description": kb.description, + "priority": kb.priority, + "isEnabled": kb.is_enabled, + "docCount": kb.doc_count, + "createdAt": kb.created_at.isoformat() + "Z", + "updatedAt": kb.updated_at.isoformat() + "Z", + } + ) + + +@router.put( + "/knowledge-bases/{kb_id}", + operation_id="updateKnowledgeBase", + summary="Update knowledge base", + description="[AC-AISVC-61] Update knowledge base name, type, description, priority, or enabled status.", + responses={ + 200: {"description": "Knowledge base updated"}, + 400: {"description": "Bad Request - invalid kb_type"}, + 404: {"description": "Knowledge base not found"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def update_knowledge_base( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + kb_id: str, + kb_update: KnowledgeBaseUpdate, +) -> JSONResponse: + """ + [AC-AISVC-61] Update a knowledge base. + """ + if kb_update.kb_type is not None: + valid_types = [t.value for t in KBType] + if kb_update.kb_type not in valid_types: + return JSONResponse( + status_code=400, + content={ + "code": "INVALID_KB_TYPE", + "message": f"Invalid kb_type: {kb_update.kb_type}", + "details": {"valid_types": valid_types}, + }, + ) + + logger.info( + f"[AC-AISVC-61] Updating knowledge base: tenant={tenant_id}, kb_id={kb_id}" + ) + + kb_service = KnowledgeBaseService(session) + kb = await kb_service.update_knowledge_base(tenant_id, kb_id, kb_update) + + if not kb: + return JSONResponse( + status_code=404, + content={ + "code": "KB_NOT_FOUND", + "message": f"Knowledge base {kb_id} not found", + }, + ) + + await session.commit() + + return JSONResponse( + content={ + "id": str(kb.id), + "name": kb.name, + "kbType": kb.kb_type, + "description": kb.description, + "priority": kb.priority, + "isEnabled": kb.is_enabled, + "docCount": kb.doc_count, + "createdAt": kb.created_at.isoformat() + "Z", + "updatedAt": kb.updated_at.isoformat() + "Z", + } + ) + + +@router.delete( + "/knowledge-bases/{kb_id}", + operation_id="deleteKnowledgeBase", + summary="Delete knowledge base", + description="[AC-AISVC-62] Delete a knowledge base and its associated documents and Qdrant Collection.", + responses={ + 204: {"description": "Knowledge base deleted"}, + 404: {"description": "Knowledge base not found"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def delete_knowledge_base( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + kb_id: str, +) -> JSONResponse: + """ + [AC-AISVC-62] Delete a knowledge base. + Also deletes associated documents and Qdrant Collection. + """ + logger.info( + f"[AC-AISVC-62] Deleting knowledge base: tenant={tenant_id}, kb_id={kb_id}" + ) + + kb_service = KnowledgeBaseService(session) + deleted = await kb_service.delete_knowledge_base(tenant_id, kb_id) + + if not deleted: + return JSONResponse( + status_code=404, + content={ + "code": "KB_NOT_FOUND", + "message": f"Knowledge base {kb_id} not found", + }, + ) + + await session.commit() + + return JSONResponse( + status_code=204, + content=None, + ) @router.get( @@ -221,7 +451,7 @@ async def list_documents( ).order_by(IndexJob.created_at.desc()) job_result = await session.execute(job_stmt) latest_job = job_result.scalar_one_or_none() - + data.append({ "docId": str(doc.id), "kbId": doc.kb_id, @@ -249,10 +479,10 @@ async def list_documents( "/documents", operation_id="uploadDocument", summary="Upload/import document", - description="[AC-ASA-01] Upload document and trigger indexing job.", + description="[AC-ASA-01, AC-AISVC-63] Upload document to specified knowledge base and trigger indexing job.", responses={ 202: {"description": "Accepted - async indexing job started"}, - 400: {"description": "Bad Request - unsupported format"}, + 400: {"description": "Bad Request - unsupported format or invalid kb_id"}, 401: {"description": "Unauthorized", "model": ErrorResponse}, 403: {"description": "Forbidden", "model": ErrorResponse}, }, @@ -265,20 +495,21 @@ async def upload_document( kb_id: str = Form(...), ) -> JSONResponse: """ - [AC-ASA-01] Upload document and create indexing job. - [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-37] Support multiple document formats. + [AC-ASA-01, AC-AISVC-63] Upload document to specified knowledge base. + Creates KB if not exists, indexes to corresponding Qdrant Collection. """ - from app.services.document import get_supported_document_formats, UnsupportedFormatError from pathlib import Path - + + from app.services.document import get_supported_document_formats + logger.info( - f"[AC-ASA-01] Uploading document: tenant={tenant_id}, " + f"[AC-AISVC-63] Uploading document: tenant={tenant_id}, " f"kb_id={kb_id}, filename={file.filename}" ) file_ext = Path(file.filename or "").suffix.lower() supported_formats = get_supported_document_formats() - + if file_ext and file_ext not in supported_formats: return JSONResponse( status_code=400, @@ -291,23 +522,35 @@ async def upload_document( }, ) - kb_service = KBService(session) + kb_service = KnowledgeBaseService(session) - kb = await kb_service.get_or_create_kb(tenant_id, kb_id) + try: + kb = await kb_service.get_knowledge_base(tenant_id, kb_id) + if not kb: + kb = await kb_service.get_or_create_default_kb(tenant_id) + kb_id = str(kb.id) + logger.info(f"[AC-AISVC-63] KB not found, using default: {kb_id}") + else: + kb_id = str(kb.id) + except Exception: + kb = await kb_service.get_or_create_default_kb(tenant_id) + kb_id = str(kb.id) + doc_kb_service = KBService(session) file_content = await file.read() - document, job = await kb_service.upload_document( + document, job = await doc_kb_service.upload_document( tenant_id=tenant_id, - kb_id=str(kb.id), + kb_id=kb_id, file_name=file.filename or "unknown", file_content=file_content, file_type=file.content_type, ) - + + await kb_service.update_doc_count(tenant_id, kb_id, delta=1) await session.commit() background_tasks.add_task( - _index_document, tenant_id, str(job.id), str(document.id), file_content, file.filename + _index_document, tenant_id, kb_id, str(job.id), str(document.id), file_content, file.filename ) return JSONResponse( @@ -315,27 +558,38 @@ async def upload_document( content={ "jobId": str(job.id), "docId": str(document.id), + "kbId": kb_id, "status": job.status, }, ) -async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: bytes, filename: str | None = None): +async def _index_document( + tenant_id: str, + kb_id: str, + job_id: str, + doc_id: str, + content: bytes, + filename: str | None = None, +): """ Background indexing task. - [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Uses document parsing and pluggable embedding. + [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-63] Uses document parsing and pluggable embedding. + Indexes to the specified knowledge base's Qdrant Collection. """ - from app.core.database import async_session_maker - from app.services.kb import KBService - from app.core.qdrant_client import get_qdrant_client - from app.services.embedding import get_embedding_provider - from app.services.document import parse_document, UnsupportedFormatError, DocumentParseException, PageText - from qdrant_client.models import PointStruct import asyncio import tempfile from pathlib import Path - logger.info(f"[INDEX] Starting indexing: tenant={tenant_id}, job_id={job_id}, doc_id={doc_id}, filename={filename}") + from qdrant_client.models import PointStruct + + from app.core.database import async_session_maker + from app.core.qdrant_client import get_qdrant_client + from app.services.document import DocumentParseException, UnsupportedFormatError, parse_document + from app.services.embedding import get_embedding_provider + from app.services.kb import KBService + + logger.info(f"[INDEX] Starting indexing: tenant={tenant_id}, kb_id={kb_id}, job_id={job_id}, doc_id={doc_id}, filename={filename}") await asyncio.sleep(1) async with async_session_maker() as session: @@ -350,11 +604,11 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt text = None file_ext = Path(filename or "").suffix.lower() logger.info(f"[INDEX] File extension: {file_ext}, content size: {len(content)} bytes") - + text_extensions = {".txt", ".md", ".markdown", ".rst", ".log", ".json", ".xml", ".yaml", ".yml"} - + if file_ext in text_extensions or not file_ext: - logger.info(f"[INDEX] Treating as text file, trying multiple encodings") + logger.info("[INDEX] Treating as text file, trying multiple encodings") text = None for encoding in ["utf-8", "gbk", "gb2312", "gb18030", "big5", "utf-16", "latin-1"]: try: @@ -363,23 +617,23 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt break except (UnicodeDecodeError, LookupError): continue - + if text is None: text = content.decode("utf-8", errors="replace") - logger.warning(f"[INDEX] Failed to decode with known encodings, using utf-8 with replacement") + logger.warning("[INDEX] Failed to decode with known encodings, using utf-8 with replacement") else: - logger.info(f"[INDEX] Binary file detected, will parse with document parser") + logger.info("[INDEX] Binary file detected, will parse with document parser") await kb_service.update_job_status( tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=15 ) await session.commit() - + with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file: tmp_file.write(content) tmp_path = tmp_file.name - + logger.info(f"[INDEX] Temp file created: {tmp_path}") - + try: logger.info(f"[INDEX] Starting document parsing for {file_ext}...") parse_result = parse_document(tmp_path) @@ -403,23 +657,23 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt text = content.decode("utf-8", errors="ignore") finally: Path(tmp_path).unlink(missing_ok=True) - logger.info(f"[INDEX] Temp file cleaned up") - + logger.info("[INDEX] Temp file cleaned up") + logger.info(f"[INDEX] Final text length: {len(text)} chars") if len(text) < 50: logger.warning(f"[INDEX] Text too short, preview: {repr(text[:200])}") - + await kb_service.update_job_status( tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=20 ) await session.commit() - logger.info(f"[INDEX] Getting embedding provider...") + logger.info("[INDEX] Getting embedding provider...") embedding_provider = await get_embedding_provider() logger.info(f"[INDEX] Embedding provider: {type(embedding_provider).__name__}") - + all_chunks: list[TextChunk] = [] - + if parse_result and parse_result.pages: logger.info(f"[INDEX] PDF with {len(parse_result.pages)} pages, using line-based chunking with page metadata") for page in parse_result.pages: @@ -433,7 +687,7 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt all_chunks.extend(page_chunks) logger.info(f"[INDEX] Total chunks from PDF: {len(all_chunks)}") else: - logger.info(f"[INDEX] Using line-based chunking") + logger.info("[INDEX] Using line-based chunking") all_chunks = chunk_text_by_lines( text, min_line_length=10, @@ -442,7 +696,7 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt logger.info(f"[INDEX] Total chunks: {len(all_chunks)}") qdrant = await get_qdrant_client() - await qdrant.ensure_collection_exists(tenant_id, use_multi_vector=True) + await qdrant.ensure_kb_collection_exists(tenant_id, kb_id, use_multi_vector=True) from app.services.embedding.nomic_provider import NomicEmbeddingProvider use_multi_vector = isinstance(embedding_provider, NomicEmbeddingProvider) @@ -454,6 +708,7 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt payload = { "text": chunk.text, "source": doc_id, + "kb_id": kb_id, "chunk_index": i, "start_token": chunk.start_token, "end_token": chunk.end_token, @@ -462,7 +717,7 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt payload["page"] = chunk.page if chunk.source: payload["filename"] = chunk.source - + if use_multi_vector: embedding_result = await embedding_provider.embed_document(chunk.text) points.append({ @@ -483,7 +738,7 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt payload=payload, ) ) - + progress = 20 + int((i + 1) / total_chunks * 70) if i % 10 == 0 or i == total_chunks - 1: await kb_service.update_job_status( @@ -492,11 +747,11 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt await session.commit() if points: - logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant...") + logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant for kb_id={kb_id}...") if use_multi_vector: - await qdrant.upsert_multi_vector(tenant_id, points) + await qdrant.upsert_multi_vector(tenant_id, points, kb_id=kb_id) else: - await qdrant.upsert_vectors(tenant_id, points) + await qdrant.upsert_vectors(tenant_id, points, kb_id=kb_id) await kb_service.update_job_status( tenant_id, job_id, IndexJobStatus.COMPLETED.value, progress=100 @@ -504,7 +759,7 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt await session.commit() logger.info( - f"[INDEX] COMPLETED: tenant={tenant_id}, " + f"[INDEX] COMPLETED: tenant={tenant_id}, kb_id={kb_id}, " f"job_id={job_id}, chunks={len(all_chunks)}, text_len={len(text)}" ) diff --git a/ai-service/app/api/admin/kb_optimized.py b/ai-service/app/api/admin/kb_optimized.py index 9bdfe2f..5775fd2 100644 --- a/ai-service/app/api/admin/kb_optimized.py +++ b/ai-service/app/api/admin/kb_optimized.py @@ -4,7 +4,6 @@ Reference: rag-optimization/spec.md Section 4.2 """ import logging -from datetime import date from typing import Any from fastapi import APIRouter, Depends, HTTPException, status @@ -16,9 +15,6 @@ from app.core.database import get_session from app.services.retrieval import ( ChunkMetadata, ChunkMetadataModel, - IndexingProgress, - IndexingResult, - KnowledgeIndexer, MetadataFilter, RetrievalStrategy, get_knowledge_indexer, @@ -100,7 +96,7 @@ async def index_document( ): """ Index a document with optimized embedding. - + Features: - Task prefixes (search_document:) for document embedding - Multi-dimensional vectors (256/512/768) @@ -108,7 +104,7 @@ async def index_document( """ try: index = get_knowledge_indexer() - + chunk_metadata = None if request.metadata: chunk_metadata = ChunkMetadata( @@ -121,14 +117,14 @@ async def index_document( priority=request.metadata.priority, keywords=request.metadata.keywords, ) - + result = await index.index_document( tenant_id=request.tenant_id, document_id=request.document_id, text=request.text, metadata=chunk_metadata, ) - + return IndexDocumentResponse( success=result.success, total_chunks=result.total_chunks, @@ -137,7 +133,7 @@ async def index_document( elapsed_seconds=result.elapsed_seconds, error_message=result.error_message, ) - + except Exception as e: logger.error(f"[KB-API] Failed to index document: {e}") raise HTTPException( @@ -152,10 +148,10 @@ async def get_indexing_progress(): try: index = get_knowledge_indexer() progress = index.get_progress() - + if progress is None: return None - + return IndexingProgressResponse( total_chunks=progress.total_chunks, processed_chunks=progress.processed_chunks, @@ -164,7 +160,7 @@ async def get_indexing_progress(): elapsed_seconds=progress.elapsed_seconds, current_document=progress.current_document, ) - + except Exception as e: logger.error(f"[KB-API] Failed to get progress: {e}") raise HTTPException( @@ -177,7 +173,7 @@ async def get_indexing_progress(): async def retrieve_knowledge(request: RetrieveRequest): """ Retrieve knowledge using optimized RAG. - + Strategies: - vector: Simple vector search - bm25: BM25 keyword search @@ -185,26 +181,26 @@ async def retrieve_knowledge(request: RetrieveRequest): - two_stage: Two-stage retrieval with Matryoshka dimensions """ try: - from app.services.retrieval.optimized_retriever import get_optimized_retriever from app.services.retrieval.base import RetrievalContext - + from app.services.retrieval.optimized_retriever import get_optimized_retriever + retriever = await get_optimized_retriever() - + metadata_filter = None if request.filters: filter_dict = request.filters.model_dump(exclude_none=True) metadata_filter = MetadataFilter(**filter_dict) - + ctx = RetrievalContext( tenant_id=request.tenant_id, query=request.query, ) - + if metadata_filter: ctx.metadata = {"filter": metadata_filter.to_qdrant_filter()} - + result = await retriever.retrieve(ctx) - + return RetrieveResponse( hits=[ { @@ -220,7 +216,7 @@ async def retrieve_knowledge(request: RetrieveRequest): is_insufficient=result.diagnostics.get("is_insufficient", False), diagnostics=result.diagnostics or {}, ) - + except Exception as e: logger.error(f"[KB-API] Failed to retrieve: {e}") raise HTTPException( @@ -266,7 +262,7 @@ async def get_metadata_options(): ], priorities=list(range(1, 11)), ) - + except Exception as e: logger.error(f"[KB-API] Failed to get metadata options: {e}") raise HTTPException( @@ -286,42 +282,42 @@ async def reindex_all( """ try: from app.models.entities import Document, DocumentStatus - + stmt = select(Document).where( Document.tenant_id == tenant_id, Document.status == DocumentStatus.COMPLETED.value, ) result = await session.execute(stmt) documents = result.scalars().all() - + index = get_knowledge_indexer() - + total_indexed = 0 total_failed = 0 - + for doc in documents: if doc.file_path: import os if os.path.exists(doc.file_path): - with open(doc.file_path, 'r', encoding='utf-8') as f: + with open(doc.file_path, encoding='utf-8') as f: text = f.read() - + result = await index.index_document( tenant_id=tenant_id, document_id=str(doc.id), text=text, ) - + total_indexed += result.indexed_chunks total_failed += result.failed_chunks - + return { "success": True, "total_documents": len(documents), "total_indexed": total_indexed, "total_failed": total_failed, } - + except Exception as e: logger.error(f"[KB-API] Failed to reindex: {e}") raise HTTPException( diff --git a/ai-service/app/api/admin/llm.py b/ai-service/app/api/admin/llm.py index fa37891..e862f92 100644 --- a/ai-service/app/api/admin/llm.py +++ b/ai-service/app/api/admin/llm.py @@ -9,7 +9,6 @@ from typing import Any from fastapi import APIRouter, Depends, Header, HTTPException from app.services.llm.factory import ( - LLMConfigManager, LLMProviderFactory, get_llm_config_manager, ) diff --git a/ai-service/app/api/admin/monitoring.py b/ai-service/app/api/admin/monitoring.py index 76de0d7..eccc313 100644 --- a/ai-service/app/api/admin/monitoring.py +++ b/ai-service/app/api/admin/monitoring.py @@ -33,6 +33,7 @@ from app.models.entities import ( from app.services.monitoring.flow_monitor import FlowMonitor from app.services.monitoring.guardrail_monitor import GuardrailMonitor from app.services.monitoring.intent_monitor import IntentMonitor +from app.services.monitoring.prompt_monitor import PromptMonitor logger = logging.getLogger(__name__) @@ -190,6 +191,28 @@ async def get_guardrail_blocks( } +@router.get("/prompt-templates") +async def get_prompt_template_stats( + tenant_id: str = Depends(get_tenant_id), + start_date: datetime | None = Query(None, description="Start date filter"), + end_date: datetime | None = Query(None, description="End date filter"), + scene: str | None = Query(None, description="Scene filter"), + session: AsyncSession = Depends(get_session), +) -> dict[str, Any]: + """ + [AC-AISVC-100] Get aggregated statistics for all prompt templates. + """ + logger.info( + f"[AC-AISVC-100] Getting prompt template stats for tenant={tenant_id}, " + f"start={start_date}, end={end_date}, scene={scene}" + ) + + monitor = PromptMonitor(session) + result = await monitor.get_template_stats(tenant_id, scene, start_date, end_date) + + return result.to_dict() + + @router.get("/conversations") async def list_conversations( tenant_id: str = Depends(get_tenant_id), diff --git a/ai-service/app/api/admin/rag.py b/ai-service/app/api/admin/rag.py index e5b46f1..6b75b70 100644 --- a/ai-service/app/api/admin/rag.py +++ b/ai-service/app/api/admin/rag.py @@ -6,21 +6,20 @@ RAG Lab endpoints for debugging and experimentation. import json import logging import time -from typing import Annotated, Any, List +from typing import Annotated, Any -from fastapi import APIRouter, Depends, Body +from fastapi import APIRouter, Body, Depends from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field from app.core.config import get_settings from app.core.exceptions import MissingTenantIdException -from app.core.prompts import format_evidence_for_prompt, build_user_prompt_with_evidence +from app.core.prompts import build_user_prompt_with_evidence, format_evidence_for_prompt from app.core.tenant import get_tenant_id from app.models import ErrorResponse -from app.services.retrieval.vector_retriever import get_vector_retriever -from app.services.retrieval.optimized_retriever import get_optimized_retriever -from app.services.retrieval.base import RetrievalContext from app.services.llm.factory import get_llm_config_manager +from app.services.retrieval.base import RetrievalContext +from app.services.retrieval.optimized_retriever import get_optimized_retriever logger = logging.getLogger(__name__) @@ -37,7 +36,7 @@ def get_current_tenant_id() -> str: class RAGExperimentRequest(BaseModel): query: str = Field(..., description="Query text for retrieval") - kb_ids: List[str] | None = Field(default=None, description="Knowledge base IDs to search") + kb_ids: list[str] | None = Field(default=None, description="Knowledge base IDs to search") top_k: int = Field(default=5, description="Number of results to retrieve") score_threshold: float = Field(default=0.5, description="Minimum similarity score") generate_response: bool = Field(default=True, description="Whether to generate AI response") @@ -55,7 +54,7 @@ class AIResponse(BaseModel): class RAGExperimentResult(BaseModel): query: str - retrieval_results: List[dict] = [] + retrieval_results: list[dict] = [] final_prompt: str = "" ai_response: AIResponse | None = None total_latency_ms: float = 0 @@ -227,10 +226,10 @@ async def run_rag_experiment_stream( final_prompt = _build_final_prompt(request.query, retrieval_results) - logger.info(f"[AC-ASA-20] ========== RAG LAB STREAM FULL PROMPT ==========") + logger.info("[AC-ASA-20] ========== RAG LAB STREAM FULL PROMPT ==========") logger.info(f"[AC-ASA-20] Prompt length: {len(final_prompt)}") logger.info(f"[AC-ASA-20] Prompt content:\n{final_prompt}") - logger.info(f"[AC-ASA-20] ==============================================") + logger.info("[AC-ASA-20] ==============================================") yield f"event: retrieval\ndata: {json.dumps({'results': retrieval_results, 'count': len(retrieval_results)})}\n\n" @@ -276,10 +275,10 @@ async def _generate_ai_response( """ import time - logger.info(f"[AC-ASA-19] ========== RAG LAB FULL PROMPT ==========") + logger.info("[AC-ASA-19] ========== RAG LAB FULL PROMPT ==========") logger.info(f"[AC-ASA-19] Prompt length: {len(prompt)}") logger.info(f"[AC-ASA-19] Prompt content:\n{prompt}") - logger.info(f"[AC-ASA-19] ==========================================") + logger.info("[AC-ASA-19] ==========================================") try: manager = get_llm_config_manager() diff --git a/ai-service/app/api/admin/sessions.py b/ai-service/app/api/admin/sessions.py index e794fcb..29b7b3f 100644 --- a/ai-service/app/api/admin/sessions.py +++ b/ai-service/app/api/admin/sessions.py @@ -4,12 +4,13 @@ Session monitoring and management endpoints. """ import logging -from typing import Annotated, Optional, Sequence +from collections.abc import Sequence from datetime import datetime +from typing import Annotated, Optional from fastapi import APIRouter, Depends, Query from fastapi.responses import JSONResponse -from sqlalchemy import select, func +from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from sqlmodel import col @@ -17,7 +18,7 @@ from app.core.database import get_session from app.core.exceptions import MissingTenantIdException from app.core.tenant import get_tenant_id from app.models import ErrorResponse -from app.models.entities import ChatSession, ChatMessage, SessionStatus +from app.models.entities import ChatMessage, ChatSession, SessionStatus logger = logging.getLogger(__name__) diff --git a/ai-service/app/api/chat.py b/ai-service/app/api/chat.py index 97ed5e6..c66a8d8 100644 --- a/ai-service/app/api/chat.py +++ b/ai-service/app/api/chat.py @@ -8,14 +8,14 @@ from typing import Annotated, Any from fastapi import APIRouter, Depends, Header, Request from fastapi.responses import JSONResponse -from sse_starlette.sse import EventSourceResponse from sqlalchemy.ext.asyncio import AsyncSession +from sse_starlette.sse import EventSourceResponse from app.core.database import get_session from app.core.middleware import get_response_mode, is_sse_request from app.core.sse import SSEStateMachine, create_error_event from app.core.tenant import get_tenant_id -from app.models import ChatRequest, ChatResponse, ErrorResponse +from app.models import ChatRequest, ErrorResponse from app.services.memory import MemoryService from app.services.orchestrator import OrchestratorService @@ -33,12 +33,12 @@ async def get_orchestrator_service_with_memory( """ from app.services.llm.factory import get_llm_config_manager from app.services.retrieval.optimized_retriever import get_optimized_retriever - + memory_service = MemoryService(session) llm_config_manager = get_llm_config_manager() llm_client = llm_config_manager.get_client() retriever = await get_optimized_retriever() - + return OrchestratorService( llm_client=llm_client, memory_service=memory_service, @@ -52,7 +52,7 @@ async def get_orchestrator_service_with_memory( summary="Generate AI reply", description=""" [AC-AISVC-01, AC-AISVC-02, AC-AISVC-06] Generate AI reply based on user message. - + Response mode is determined by Accept header: - Accept: text/event-stream -> SSE streaming response - Other -> JSON response @@ -78,7 +78,7 @@ async def generate_reply( ) -> Any: """ [AC-AISVC-06] Generate AI reply with automatic response mode switching. - + Based on Accept header: - text/event-stream: Returns SSE stream with message/final/error events - Other: Returns JSON ChatResponse @@ -134,11 +134,11 @@ async def _handle_streaming_request( ) -> EventSourceResponse: """ [AC-AISVC-06, AC-AISVC-07, AC-AISVC-08, AC-AISVC-09] Handle SSE streaming request. - + SSE Event Sequence (per design.md Section 6.2): - message* (0 or more) -> final (exactly 1) -> close - OR message* (0 or more) -> error (exactly 1) -> close - + State machine ensures: - No events after final/error - Only one final OR one error event diff --git a/ai-service/app/core/__init__.py b/ai-service/app/core/__init__.py index dee8983..0998e98 100644 --- a/ai-service/app/core/__init__.py +++ b/ai-service/app/core/__init__.py @@ -4,7 +4,7 @@ Core module - Configuration, dependencies, and utilities. """ from app.core.config import Settings, get_settings -from app.core.database import async_session_maker, get_session, init_db, close_db +from app.core.database import async_session_maker, close_db, get_session, init_db from app.core.qdrant_client import QdrantClient, get_qdrant_client __all__ = [ diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py index 6fcadec..393cbff 100644 --- a/ai-service/app/core/config.py +++ b/ai-service/app/core/config.py @@ -47,7 +47,7 @@ class Settings(BaseSettings): rag_score_threshold: float = 0.01 rag_min_hits: int = 1 rag_max_evidence_tokens: int = 2000 - + rag_two_stage_enabled: bool = True rag_two_stage_expand_factor: int = 10 rag_hybrid_enabled: bool = True @@ -60,6 +60,11 @@ class Settings(BaseSettings): confidence_insufficient_penalty: float = 0.3 max_history_tokens: int = 4000 + redis_url: str = "redis://localhost:6379/0" + redis_enabled: bool = True + dashboard_cache_ttl: int = 60 + stats_counter_ttl: int = 7776000 + @lru_cache def get_settings() -> Settings: diff --git a/ai-service/app/core/database.py b/ai-service/app/core/database.py index b15cde1..a7afdb6 100644 --- a/ai-service/app/core/database.py +++ b/ai-service/app/core/database.py @@ -4,13 +4,13 @@ Database client for AI Service. """ import logging -from typing import AsyncGenerator +from collections.abc import AsyncGenerator from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine -from sqlalchemy.pool import NullPool from sqlmodel import SQLModel from app.core.config import get_settings +from app.models import entities # noqa: F401 - Import to register all SQLModel tables logger = logging.getLogger(__name__) diff --git a/ai-service/app/core/exceptions.py b/ai-service/app/core/exceptions.py index dfd3262..86c5f4a 100644 --- a/ai-service/app/core/exceptions.py +++ b/ai-service/app/core/exceptions.py @@ -90,6 +90,9 @@ async def http_exception_handler(request: Request, exc: HTTPException) -> JSONRe async def generic_exception_handler(request: Request, exc: Exception) -> JSONResponse: + import logging + logger = logging.getLogger(__name__) + logger.error(f"Unhandled exception: {type(exc).__name__}: {exc}", exc_info=True) return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse( diff --git a/ai-service/app/core/middleware.py b/ai-service/app/core/middleware.py index e1db0c3..ac5b154 100644 --- a/ai-service/app/core/middleware.py +++ b/ai-service/app/core/middleware.py @@ -5,13 +5,13 @@ Middleware for AI Service. import logging import re -from typing import Callable +from collections.abc import Callable from fastapi import Request, Response, status from fastapi.responses import JSONResponse from starlette.middleware.base import BaseHTTPMiddleware -from app.core.exceptions import ErrorCode, ErrorResponse, MissingTenantIdException +from app.core.exceptions import ErrorCode, ErrorResponse from app.core.tenant import clear_tenant_context, set_tenant_context logger = logging.getLogger(__name__) @@ -52,7 +52,7 @@ def parse_tenant_id(tenant_id: str) -> tuple[str, str]: class ApiKeyMiddleware(BaseHTTPMiddleware): """ [AC-AISVC-50] Middleware to validate API Key for all requests. - + Features: - Validates X-API-Key header against in-memory cache - Skips validation for health/docs endpoints @@ -80,6 +80,16 @@ class ApiKeyMiddleware(BaseHTTPMiddleware): from app.services.api_key import get_api_key_service service = get_api_key_service() + if not service._initialized: + logger.warning("[AC-AISVC-50] API key service not initialized, attempting lazy initialization...") + try: + from app.core.database import async_session_maker + async with async_session_maker() as session: + await service.initialize(session) + logger.info(f"[AC-AISVC-50] API key service lazy initialized with {len(service._keys_cache)} keys") + except Exception as e: + logger.error(f"[AC-AISVC-50] Failed to initialize API key service: {e}") + if not service.validate_key(api_key): logger.warning(f"[AC-AISVC-50] Invalid API key for {request.url.path}") return JSONResponse( @@ -148,10 +158,16 @@ class TenantContextMiddleware(BaseHTTPMiddleware): set_tenant_context(tenant_id) request.state.tenant_id = tenant_id - logger.info(f"[AC-AISVC-10] Tenant context set: tenant_id={tenant_id}") + logger.info(f"[AC-AISVC-10] Tenant context set: tenant_id={tenant_id}, path={request.url.path}") try: + logger.info(f"[MIDDLEWARE] Calling next handler for path={request.url.path}") response = await call_next(request) + logger.info(f"[MIDDLEWARE] Response received for path={request.url.path}, status={response.status_code}") + except Exception as e: + import traceback + logger.error(f"[MIDDLEWARE] Exception in call_next for path={request.url.path}: {type(e).__name__}: {e}\n{traceback.format_exc()}") + raise finally: clear_tenant_context() @@ -162,7 +178,6 @@ class TenantContextMiddleware(BaseHTTPMiddleware): [AC-AISVC-10] Ensure tenant exists in database, create if not. """ from sqlalchemy import select - from sqlalchemy.ext.asyncio import AsyncSession from app.core.database import async_session_maker from app.models.entities import Tenant diff --git a/ai-service/app/core/prompts.py b/ai-service/app/core/prompts.py index 91198b8..1c9a427 100644 --- a/ai-service/app/core/prompts.py +++ b/ai-service/app/core/prompts.py @@ -15,26 +15,26 @@ SYSTEM_PROMPT = """你是一名经验丰富的客服专员,名字叫"小N"。 def format_evidence_for_prompt( - retrieval_results: list, - max_results: int = 5, + retrieval_results: list, + max_results: int = 5, max_content_length: int = 500 ) -> str: """ Format retrieval results as evidence text for prompts. - + Args: retrieval_results: List of retrieval hits. Can be: - dict format: {'content', 'score', 'source', 'metadata'} - RetrievalHit object: with .text, .score, .source, .metadata attributes max_results: Maximum number of results to include max_content_length: Maximum length of each content snippet - + Returns: Formatted evidence text """ if not retrieval_results: return "" - + evidence_parts = [] for i, hit in enumerate(retrieval_results[:max_results]): if hasattr(hit, 'text'): @@ -47,15 +47,15 @@ def format_evidence_for_prompt( score = hit.get('score', 0) source = hit.get('source', '知识库') metadata = hit.get('metadata', {}) or {} - + if len(content) > max_content_length: content = content[:max_content_length] + '...' - + nested_meta = metadata.get('metadata', {}) source_doc = nested_meta.get('source_doc', source) if nested_meta else source category = nested_meta.get('category', '') if nested_meta else '' department = nested_meta.get('department', '') if nested_meta else '' - + header = f"[文档{i+1}]" if source_doc and source_doc != "知识库": header += f" 来源:{source_doc}" @@ -63,25 +63,25 @@ def format_evidence_for_prompt( header += f" | 类别:{category}" if department: header += f" | 部门:{department}" - + evidence_parts.append(f"{header}\n相关度:{score:.2f}\n内容:{content}") - + return "\n\n".join(evidence_parts) def build_system_prompt_with_evidence(evidence_text: str) -> str: """ Build system prompt with knowledge base evidence. - + Args: evidence_text: Formatted evidence from retrieval results - + Returns: Complete system prompt """ if not evidence_text: return SYSTEM_PROMPT - + return f"""{SYSTEM_PROMPT} 知识库参考内容: @@ -91,11 +91,11 @@ def build_system_prompt_with_evidence(evidence_text: str) -> str: def build_user_prompt_with_evidence(query: str, evidence_text: str) -> str: """ Build user prompt with knowledge base evidence (for single-message format). - + Args: query: User's question evidence_text: Formatted evidence from retrieval results - + Returns: Complete user prompt """ @@ -103,7 +103,7 @@ def build_user_prompt_with_evidence(query: str, evidence_text: str) -> str: return f"""用户问题:{query} 未找到相关检索结果,请基于通用知识回答用户问题。""" - + return f"""【系统指令】 {SYSTEM_PROMPT} diff --git a/ai-service/app/core/qdrant_client.py b/ai-service/app/core/qdrant_client.py index 19b47f4..e830411 100644 --- a/ai-service/app/core/qdrant_client.py +++ b/ai-service/app/core/qdrant_client.py @@ -8,7 +8,7 @@ import logging from typing import Any from qdrant_client import AsyncQdrantClient -from qdrant_client.models import Distance, PointStruct, VectorParams, QueryRequest +from qdrant_client.models import Distance, PointStruct, VectorParams from app.core.config import get_settings @@ -19,8 +19,12 @@ settings = get_settings() class QdrantClient: """ - [AC-AISVC-10] Qdrant client with tenant-isolated collection management. - Collection naming: kb_{tenantId} for tenant isolation. + [AC-AISVC-10, AC-AISVC-59] Qdrant client with tenant-isolated collection management. + + Collection naming conventions: + - Legacy (single KB): kb_{tenantId} + - Multi-KB: kb_{tenantId}_{kbId} + Supports multi-dimensional vectors (256/512/768) for Matryoshka retrieval. """ @@ -45,16 +49,42 @@ class QdrantClient: def get_collection_name(self, tenant_id: str) -> str: """ - [AC-AISVC-10] Get collection name for a tenant. + [AC-AISVC-10] Get legacy collection name for a tenant. Naming convention: kb_{tenantId} Replaces @ with _ to ensure valid collection names. + + Note: This is kept for backward compatibility. + For multi-KB, use get_kb_collection_name() instead. """ safe_tenant_id = tenant_id.replace('@', '_') return f"{self._collection_prefix}{safe_tenant_id}" + def get_kb_collection_name(self, tenant_id: str, kb_id: str | None = None) -> str: + """ + [AC-AISVC-59, AC-AISVC-63] Get collection name for a specific knowledge base. + + Naming convention: + - If kb_id is None or "default": kb_{tenantId} (legacy format for backward compatibility) + - Otherwise: kb_{tenantId}_{kbId} + + Args: + tenant_id: Tenant identifier + kb_id: Knowledge base ID (optional, defaults to legacy naming) + + Returns: + Collection name for the knowledge base + """ + safe_tenant_id = tenant_id.replace('@', '_') + + if kb_id is None or kb_id == "default" or kb_id == "": + return f"{self._collection_prefix}{safe_tenant_id}" + + safe_kb_id = kb_id.replace('-', '_')[:8] + return f"{self._collection_prefix}{safe_tenant_id}_{safe_kb_id}" + async def ensure_collection_exists(self, tenant_id: str, use_multi_vector: bool = True) -> bool: """ - [AC-AISVC-10] Ensure collection exists for tenant. + [AC-AISVC-10] Ensure collection exists for tenant (legacy single-KB mode). Supports multi-dimensional vectors for Matryoshka retrieval. """ client = await self.get_client() @@ -84,7 +114,7 @@ class QdrantClient: size=self._vector_size, distance=Distance.COSINE, ) - + await client.create_collection( collection_name=collection_name, vectors_config=vectors_config, @@ -98,16 +128,80 @@ class QdrantClient: logger.error(f"[AC-AISVC-10] Error ensuring collection: {e}") return False + async def ensure_kb_collection_exists( + self, + tenant_id: str, + kb_id: str | None = None, + use_multi_vector: bool = True, + ) -> bool: + """ + [AC-AISVC-59] Ensure collection exists for a specific knowledge base. + + Args: + tenant_id: Tenant identifier + kb_id: Knowledge base ID (optional, defaults to legacy naming) + use_multi_vector: Whether to use multi-dimensional vectors + + Returns: + True if collection exists or was created successfully + """ + client = await self.get_client() + collection_name = self.get_kb_collection_name(tenant_id, kb_id) + + try: + exists = await client.collection_exists(collection_name) + + if not exists: + if use_multi_vector: + vectors_config = { + "full": VectorParams( + size=768, + distance=Distance.COSINE, + ), + "dim_256": VectorParams( + size=256, + distance=Distance.COSINE, + ), + "dim_512": VectorParams( + size=512, + distance=Distance.COSINE, + ), + } + else: + vectors_config = VectorParams( + size=self._vector_size, + distance=Distance.COSINE, + ) + + await client.create_collection( + collection_name=collection_name, + vectors_config=vectors_config, + ) + logger.info( + f"[AC-AISVC-59] Created KB collection: {collection_name} for tenant={tenant_id}, kb_id={kb_id} " + f"with multi_vector={use_multi_vector}" + ) + return True + except Exception as e: + logger.error(f"[AC-AISVC-59] Error ensuring KB collection: {e}") + return False + async def upsert_vectors( self, tenant_id: str, points: list[PointStruct], + kb_id: str | None = None, ) -> bool: """ - [AC-AISVC-10] Upsert vectors into tenant's collection. + [AC-AISVC-10, AC-AISVC-63] Upsert vectors into tenant's collection. + + Args: + tenant_id: Tenant identifier + points: List of PointStruct to upsert + kb_id: Knowledge base ID (optional, uses legacy naming if not provided) """ client = await self.get_client() - collection_name = self.get_collection_name(tenant_id) + collection_name = self.get_kb_collection_name(tenant_id, kb_id) try: await client.upsert( @@ -115,7 +209,7 @@ class QdrantClient: points=points, ) logger.info( - f"[AC-AISVC-10] Upserted {len(points)} vectors for tenant={tenant_id}" + f"[AC-AISVC-10] Upserted {len(points)} vectors for tenant={tenant_id}, kb_id={kb_id}" ) return True except Exception as e: @@ -126,10 +220,11 @@ class QdrantClient: self, tenant_id: str, points: list[dict[str, Any]], + kb_id: str | None = None, ) -> bool: """ Upsert points with multi-dimensional vectors. - + Args: tenant_id: Tenant identifier points: List of points with format: @@ -142,9 +237,10 @@ class QdrantClient: }, "payload": dict } + kb_id: Knowledge base ID (optional, uses legacy naming if not provided) """ client = await self.get_client() - collection_name = self.get_collection_name(tenant_id) + collection_name = self.get_kb_collection_name(tenant_id, kb_id) try: qdrant_points = [] @@ -155,13 +251,13 @@ class QdrantClient: payload=p.get("payload", {}), ) qdrant_points.append(point) - + await client.upsert( collection_name=collection_name, points=qdrant_points, ) logger.info( - f"[RAG-OPT] Upserted {len(points)} multi-vector points for tenant={tenant_id}" + f"[RAG-OPT] Upserted {len(points)} multi-vector points for tenant={tenant_id}, kb_id={kb_id}" ) return True except Exception as e: @@ -181,7 +277,7 @@ class QdrantClient: [AC-AISVC-10] Search vectors in tenant's collection. Returns results with score >= score_threshold if specified. Searches both old format (with @) and new format (with _) for backward compatibility. - + Args: tenant_id: Tenant identifier query_vector: Query vector for similarity search @@ -192,31 +288,31 @@ class QdrantClient: with_vectors: Whether to return vectors in results (for two-stage reranking) """ client = await self.get_client() - + logger.info( f"[AC-AISVC-10] Starting search: tenant_id={tenant_id}, " f"limit={limit}, score_threshold={score_threshold}, vector_dim={len(query_vector)}, vector_name={vector_name}" ) - + collection_names = [self.get_collection_name(tenant_id)] if '@' in tenant_id: old_format = f"{self._collection_prefix}{tenant_id}" new_format = f"{self._collection_prefix}{tenant_id.replace('@', '_')}" collection_names = [new_format, old_format] - + logger.info(f"[AC-AISVC-10] Will search in collections: {collection_names}") - + all_hits = [] - + for collection_name in collection_names: try: logger.info(f"[AC-AISVC-10] Searching in collection: {collection_name}") - + exists = await client.collection_exists(collection_name) if not exists: logger.warning(f"[AC-AISVC-10] Collection {collection_name} does not exist") continue - + try: results = await client.query_points( collection_name=collection_name, @@ -241,7 +337,7 @@ class QdrantClient: ) else: raise - + logger.info( f"[AC-AISVC-10] Collection {collection_name} returned {len(results.points)} raw results" ) @@ -257,7 +353,7 @@ class QdrantClient: hit["vector"] = result.vector hits.append(hit) all_hits.extend(hits) - + if hits: logger.info( f"[AC-AISVC-10] Search in collection {collection_name}: {len(hits)} results for tenant={tenant_id}" @@ -277,17 +373,17 @@ class QdrantClient: continue all_hits = sorted(all_hits, key=lambda x: x["score"], reverse=True)[:limit] - + logger.info( f"[AC-AISVC-10] Search returned {len(all_hits)} total results for tenant={tenant_id}" ) - + if len(all_hits) == 0: logger.warning( f"[AC-AISVC-10] No results found! tenant={tenant_id}, " f"collections_tried={collection_names}, limit={limit}" ) - + return all_hits async def delete_collection(self, tenant_id: str) -> bool: @@ -306,6 +402,132 @@ class QdrantClient: logger.error(f"[AC-AISVC-10] Error deleting collection: {e}") return False + async def delete_kb_collection(self, tenant_id: str, kb_id: str) -> bool: + """ + [AC-AISVC-62] Delete a specific knowledge base's collection. + + Args: + tenant_id: Tenant identifier + kb_id: Knowledge base ID + + Returns: + True if collection was deleted successfully + """ + client = await self.get_client() + collection_name = self.get_kb_collection_name(tenant_id, kb_id) + + try: + exists = await client.collection_exists(collection_name) + if exists: + await client.delete_collection(collection_name=collection_name) + logger.info(f"[AC-AISVC-62] Deleted KB collection: {collection_name} for kb_id={kb_id}") + else: + logger.info(f"[AC-AISVC-62] KB collection {collection_name} does not exist, nothing to delete") + return True + except Exception as e: + logger.error(f"[AC-AISVC-62] Error deleting KB collection: {e}") + return False + + async def search_kb( + self, + tenant_id: str, + query_vector: list[float], + kb_ids: list[str] | None = None, + limit: int = 5, + score_threshold: float | None = None, + vector_name: str = "full", + with_vectors: bool = False, + ) -> list[dict[str, Any]]: + """ + [AC-AISVC-64] Search vectors across multiple knowledge base collections. + + Args: + tenant_id: Tenant identifier + query_vector: Query vector for similarity search + kb_ids: List of knowledge base IDs to search. If None, searches legacy collection. + limit: Maximum number of results per collection + score_threshold: Minimum score threshold for results + vector_name: Name of the vector to search + with_vectors: Whether to return vectors in results + + Returns: + Combined and sorted results from all collections + """ + client = await self.get_client() + + if kb_ids is None or len(kb_ids) == 0: + return await self.search( + tenant_id=tenant_id, + query_vector=query_vector, + limit=limit, + score_threshold=score_threshold, + vector_name=vector_name, + with_vectors=with_vectors, + ) + + logger.info( + f"[AC-AISVC-64] Starting multi-KB search: tenant_id={tenant_id}, " + f"kb_ids={kb_ids}, limit={limit}, score_threshold={score_threshold}" + ) + + all_hits = [] + + for kb_id in kb_ids: + collection_name = self.get_kb_collection_name(tenant_id, kb_id) + + try: + exists = await client.collection_exists(collection_name) + if not exists: + logger.warning(f"[AC-AISVC-64] Collection {collection_name} does not exist") + continue + + try: + results = await client.query_points( + collection_name=collection_name, + query=query_vector, + using=vector_name, + limit=limit, + with_vectors=with_vectors, + score_threshold=score_threshold, + ) + except Exception as e: + if "vector name" in str(e).lower() or "Not existing vector" in str(e) or "using" in str(e).lower(): + results = await client.query_points( + collection_name=collection_name, + query=query_vector, + limit=limit, + with_vectors=with_vectors, + score_threshold=score_threshold, + ) + else: + raise + + for result in results.points: + hit = { + "id": str(result.id), + "score": result.score, + "payload": result.payload or {}, + "kb_id": kb_id, + } + if with_vectors and result.vector: + hit["vector"] = result.vector + all_hits.append(hit) + + logger.info( + f"[AC-AISVC-64] Collection {collection_name} returned {len(results.points)} results" + ) + except Exception as e: + logger.warning(f"[AC-AISVC-64] Error searching collection {collection_name}: {e}") + continue + + all_hits = sorted(all_hits, key=lambda x: x["score"], reverse=True)[:limit] + + logger.info( + f"[AC-AISVC-64] Multi-KB search returned {len(all_hits)} total results" + ) + + return all_hits + _qdrant_client: QdrantClient | None = None diff --git a/ai-service/app/core/sse.py b/ai-service/app/core/sse.py index 1930323..80d2921 100644 --- a/ai-service/app/core/sse.py +++ b/ai-service/app/core/sse.py @@ -6,8 +6,9 @@ SSE utilities for AI Service. import asyncio import json import logging +from collections.abc import AsyncGenerator from enum import Enum -from typing import Any, AsyncGenerator +from typing import Any from sse_starlette.sse import EventSourceResponse, ServerSentEvent @@ -43,7 +44,7 @@ class SSEStateMachine: async with self._lock: if self._state == SSEState.INIT: self._state = SSEState.STREAMING - logger.debug(f"[AC-AISVC-07] SSE state transition: INIT -> STREAMING") + logger.debug("[AC-AISVC-07] SSE state transition: INIT -> STREAMING") return True return False @@ -51,7 +52,7 @@ class SSEStateMachine: async with self._lock: if self._state == SSEState.STREAMING: self._state = SSEState.FINAL_SENT - logger.debug(f"[AC-AISVC-08] SSE state transition: STREAMING -> FINAL_SENT") + logger.debug("[AC-AISVC-08] SSE state transition: STREAMING -> FINAL_SENT") return True return False diff --git a/ai-service/app/models/entities.py b/ai-service/app/models/entities.py index 56bf0b1..775b5e1 100644 --- a/ai-service/app/models/entities.py +++ b/ai-service/app/models/entities.py @@ -21,7 +21,6 @@ class ChatSession(SQLModel, table=True): __tablename__ = "chat_sessions" __table_args__ = ( Index("ix_chat_sessions_tenant_session", "tenant_id", "session_id", unique=True), - Index("ix_chat_sessions_tenant_id", "tenant_id"), ) id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) @@ -41,12 +40,16 @@ class ChatMessage(SQLModel, table=True): """ [AC-AISVC-13] Chat message entity with tenant isolation. Messages are scoped by (tenant_id, session_id) for multi-tenant security. + [v0.7.0] Extended with monitoring fields for Dashboard statistics. """ __tablename__ = "chat_messages" __table_args__ = ( Index("ix_chat_messages_tenant_session", "tenant_id", "session_id"), Index("ix_chat_messages_tenant_session_created", "tenant_id", "session_id", "created_at"), + Index("ix_chat_messages_tenant_template", "tenant_id", "prompt_template_id"), + Index("ix_chat_messages_tenant_intent", "tenant_id", "intent_rule_id"), + Index("ix_chat_messages_tenant_flow", "tenant_id", "flow_instance_id"), ) id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) @@ -63,6 +66,31 @@ class ChatMessage(SQLModel, table=True): error_message: str | None = Field(default=None, description="Error message if any") created_at: datetime = Field(default_factory=datetime.utcnow, description="Message creation time") + prompt_template_id: uuid.UUID | None = Field( + default=None, + description="[v0.7.0] ID of the Prompt template used", + foreign_key="prompt_templates.id", + ) + intent_rule_id: uuid.UUID | None = Field( + default=None, + description="[v0.7.0] ID of the Intent rule that matched", + foreign_key="intent_rules.id", + ) + flow_instance_id: uuid.UUID | None = Field( + default=None, + description="[v0.7.0] ID of the Flow instance if flow was active", + foreign_key="flow_instances.id", + ) + guardrail_triggered: bool = Field( + default=False, + description="[v0.7.0] Whether output guardrail was triggered" + ) + guardrail_words: dict[str, Any] | None = Field( + default=None, + sa_column=Column("guardrail_words", JSON, nullable=True), + description="[v0.7.0] Guardrail trigger details: words, categories, strategy" + ) + class ChatSessionCreate(SQLModel): """Schema for creating a new chat session.""" @@ -134,7 +162,6 @@ class KnowledgeBase(SQLModel, table=True): __tablename__ = "knowledge_bases" __table_args__ = ( - Index("ix_knowledge_bases_tenant_id", "tenant_id"), Index("ix_knowledge_bases_tenant_kb_type", "tenant_id", "kb_type"), ) @@ -267,7 +294,6 @@ class PromptTemplate(SQLModel, table=True): __tablename__ = "prompt_templates" __table_args__ = ( Index("ix_prompt_templates_tenant_scene", "tenant_id", "scene"), - Index("ix_prompt_templates_tenant_id", "tenant_id"), ) id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) @@ -354,7 +380,6 @@ class IntentRule(SQLModel, table=True): __tablename__ = "intent_rules" __table_args__ = ( Index("ix_intent_rules_tenant_enabled_priority", "tenant_id", "is_enabled"), - Index("ix_intent_rules_tenant_id", "tenant_id"), ) id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) @@ -465,7 +490,6 @@ class ForbiddenWord(SQLModel, table=True): __tablename__ = "forbidden_words" __table_args__ = ( Index("ix_forbidden_words_tenant_enabled", "tenant_id", "is_enabled"), - Index("ix_forbidden_words_tenant_id", "tenant_id"), ) id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) @@ -519,7 +543,6 @@ class BehaviorRule(SQLModel, table=True): __tablename__ = "behavior_rules" __table_args__ = ( Index("ix_behavior_rules_tenant_enabled", "tenant_id", "is_enabled"), - Index("ix_behavior_rules_tenant_id", "tenant_id"), ) id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) @@ -624,7 +647,6 @@ class ScriptFlow(SQLModel, table=True): __tablename__ = "script_flows" __table_args__ = ( - Index("ix_script_flows_tenant_id", "tenant_id"), Index("ix_script_flows_tenant_enabled", "tenant_id", "is_enabled"), ) @@ -746,3 +768,121 @@ class FlowAdvanceResult: if self.timeout_action is not None: result["timeout_action"] = self.timeout_action return result + + +class FlowTestRecordStatus(str, Enum): + """[AC-AISVC-93] Flow test record status.""" + SUCCESS = "success" + PARTIAL = "partial" + FAILED = "failed" + + +class FlowTestRecord(SQLModel, table=True): + """ + [AC-AISVC-93] Flow test record entity for complete 12-step execution logging. + Records are retained for 7 days (TTL cleanup via background task). + """ + + __tablename__ = "flow_test_records" + __table_args__ = ( + Index("ix_flow_test_records_tenant_created", "tenant_id", "created_at"), + Index("ix_flow_test_records_session", "session_id"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + session_id: str = Field(..., description="Session ID for test session") + status: str = Field( + default=FlowTestRecordStatus.SUCCESS.value, + description="Overall status: success/partial/failed" + ) + steps: list[dict[str, Any]] = Field( + default=[], + sa_column=Column("steps", JSON, nullable=False), + description="12-step execution logs with step, name, status, duration_ms, input, output, error, metadata" + ) + final_response: dict[str, Any] | None = Field( + default=None, + sa_column=Column("final_response", JSON, nullable=True), + description="Final ChatResponse with reply, confidence, should_transfer" + ) + total_duration_ms: int | None = Field(default=None, description="Total execution time in milliseconds") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Record creation time", index=True) + + +class FlowTestStepResult(SQLModel): + """[AC-AISVC-93] Schema for a single step result in flow test.""" + + step: int = Field(..., description="Step number (1-12)") + name: str = Field(..., description="Step name") + status: str = Field(..., description="Execution status: success/failed/skipped") + duration_ms: int | None = Field(default=None, description="Execution time in milliseconds") + input: dict[str, Any] | None = Field(default=None, description="Step input data") + output: dict[str, Any] | None = Field(default=None, description="Step output data") + error: str | None = Field(default=None, description="Error message if failed") + step_metadata: dict[str, Any] | None = Field(default=None, description="Step metadata (matched rule, template, etc.)") + + +class ExportTaskStatus(str, Enum): + """[AC-AISVC-110] Export task status.""" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + + +class ExportTask(SQLModel, table=True): + """ + [AC-AISVC-110] Export task entity for conversation export. + Supports async export with file download. + """ + + __tablename__ = "export_tasks" + __table_args__ = ( + Index("ix_export_tasks_tenant_status", "tenant_id", "status"), + Index("ix_export_tasks_tenant_created", "tenant_id", "created_at"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + status: str = Field( + default=ExportTaskStatus.PROCESSING.value, + description="Task status: processing/completed/failed" + ) + file_path: str | None = Field(default=None, description="Path to exported file") + file_name: str | None = Field(default=None, description="Generated file name") + file_size: int | None = Field(default=None, description="File size in bytes") + total_rows: int | None = Field(default=None, description="Total rows exported") + format: str = Field(default="json", description="Export format: json/csv") + filters: dict[str, Any] | None = Field( + default=None, + sa_column=Column("filters", JSON, nullable=True), + description="Export filters applied" + ) + error_message: str | None = Field(default=None, description="Error message if failed") + expires_at: datetime | None = Field(default=None, description="File expiration time (for cleanup)") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Task creation time") + completed_at: datetime | None = Field(default=None, description="Completion time") + + +class ExportTaskCreate(SQLModel): + """[AC-AISVC-110] Schema for creating an export task.""" + + format: str = "json" + filters: dict[str, Any] | None = None + + +class ConversationDetail(SQLModel): + """[AC-AISVC-109] Schema for conversation detail with execution chain.""" + + conversation_id: uuid.UUID + session_id: str + user_message: str + ai_reply: str | None = None + triggered_rules: list[dict[str, Any]] | None = None + used_template: dict[str, Any] | None = None + used_flow: dict[str, Any] | None = None + execution_time_ms: int | None = None + confidence: float | None = None + should_transfer: bool = False + execution_steps: list[dict[str, Any]] | None = None + created_at: datetime diff --git a/ai-service/app/services/api_key.py b/ai-service/app/services/api_key.py index a4c14a6..51edcdd 100644 --- a/ai-service/app/services/api_key.py +++ b/ai-service/app/services/api_key.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) class ApiKeyService: """ [AC-AISVC-50] API Key management service. - + Features: - In-memory cache for fast validation - Database persistence @@ -39,49 +39,49 @@ class ApiKeyService: select(ApiKey).where(ApiKey.is_active == True) ) keys = result.scalars().all() - + self._keys_cache = {key.key for key in keys} self._initialized = True - + logger.info(f"[AC-AISVC-50] Loaded {len(self._keys_cache)} API keys into memory") def validate_key(self, key: str) -> bool: """ Validate an API key against the in-memory cache. - + Args: key: The API key to validate - + Returns: True if the key is valid, False otherwise """ if not self._initialized: logger.warning("[AC-AISVC-50] API key service not initialized") return False - + return key in self._keys_cache def generate_key(self) -> str: """ Generate a new secure API key. - + Returns: A URL-safe random string """ return secrets.token_urlsafe(32) async def create_key( - self, - session: AsyncSession, + self, + session: AsyncSession, key_create: ApiKeyCreate ) -> ApiKey: """ Create a new API key. - + Args: session: Database session key_create: Key creation data - + Returns: The created ApiKey entity """ @@ -90,139 +90,139 @@ class ApiKeyService: name=key_create.name, is_active=key_create.is_active, ) - + session.add(api_key) await session.commit() await session.refresh(api_key) - + if api_key.is_active: self._keys_cache.add(api_key.key) - + logger.info(f"[AC-AISVC-50] Created API key: {api_key.name}") return api_key async def create_default_key(self, session: AsyncSession) -> Optional[ApiKey]: """ Create a default API key if none exists. - + Returns: The created ApiKey or None if keys already exist """ result = await session.execute(select(ApiKey).limit(1)) existing = result.scalar_one_or_none() - + if existing: return None - + default_key = secrets.token_urlsafe(32) api_key = ApiKey( key=default_key, name="Default API Key", is_active=True, ) - + session.add(api_key) await session.commit() await session.refresh(api_key) - + self._keys_cache.add(api_key.key) - + logger.info(f"[AC-AISVC-50] Created default API key: {api_key.key}") return api_key async def delete_key( - self, - session: AsyncSession, + self, + session: AsyncSession, key_id: str ) -> bool: """ Delete an API key. - + Args: session: Database session key_id: The key ID to delete - + Returns: True if deleted, False if not found """ import uuid - + try: key_uuid = uuid.UUID(key_id) except ValueError: return False - + result = await session.execute( select(ApiKey).where(ApiKey.id == key_uuid) ) api_key = result.scalar_one_or_none() - + if not api_key: return False - + key_value = api_key.key await session.delete(api_key) await session.commit() - + self._keys_cache.discard(key_value) - + logger.info(f"[AC-AISVC-50] Deleted API key: {api_key.name}") return True async def toggle_key( - self, - session: AsyncSession, + self, + session: AsyncSession, key_id: str, is_active: bool ) -> Optional[ApiKey]: """ Toggle API key active status. - + Args: session: Database session key_id: The key ID to toggle is_active: New active status - + Returns: The updated ApiKey or None if not found """ import uuid - + try: key_uuid = uuid.UUID(key_id) except ValueError: return None - + result = await session.execute( select(ApiKey).where(ApiKey.id == key_uuid) ) api_key = result.scalar_one_or_none() - + if not api_key: return None - + api_key.is_active = is_active api_key.updated_at = datetime.utcnow() - + session.add(api_key) await session.commit() await session.refresh(api_key) - + if is_active: self._keys_cache.add(api_key.key) else: self._keys_cache.discard(api_key.key) - + logger.info(f"[AC-AISVC-50] Toggled API key {api_key.name}: active={is_active}") return api_key async def list_keys(self, session: AsyncSession) -> list[ApiKey]: """ List all API keys. - + Args: session: Database session - + Returns: List of all ApiKey entities """ diff --git a/ai-service/app/services/context.py b/ai-service/app/services/context.py index 598a6c4..1aa2fdd 100644 --- a/ai-service/app/services/context.py +++ b/ai-service/app/services/context.py @@ -17,7 +17,7 @@ from typing import Any import tiktoken from app.core.config import get_settings -from app.models import ChatMessage, Role +from app.models import ChatMessage logger = logging.getLogger(__name__) diff --git a/ai-service/app/services/document/base.py b/ai-service/app/services/document/base.py index 4eee6e8..18a4f83 100644 --- a/ai-service/app/services/document/base.py +++ b/ai-service/app/services/document/base.py @@ -89,7 +89,7 @@ class DocumentParser(ABC): class DocumentParseException(Exception): """Exception raised when document parsing fails.""" - + def __init__( self, message: str, @@ -105,7 +105,7 @@ class DocumentParseException(Exception): class UnsupportedFormatError(DocumentParseException): """Exception raised when file format is not supported.""" - + def __init__(self, extension: str, supported: list[str]): super().__init__( f"Unsupported file format: {extension}. " diff --git a/ai-service/app/services/document/excel_parser.py b/ai-service/app/services/document/excel_parser.py index 449e7c2..6195fe8 100644 --- a/ai-service/app/services/document/excel_parser.py +++ b/ai-service/app/services/document/excel_parser.py @@ -58,23 +58,23 @@ class ExcelParser(DocumentParser): """ records = [] rows = list(sheet.iter_rows(max_row=self._max_rows_per_sheet, values_only=True)) - + if not rows: return records - + headers = rows[0] header_list = [str(h) if h is not None else f"column_{i}" for i, h in enumerate(headers)] - + for row in rows[1:]: record = {"_sheet": sheet_name} has_content = False - + for i, value in enumerate(row): if i < len(header_list): key = header_list[i] else: key = f"column_{i}" - + if value is not None: has_content = True if isinstance(value, (int, float, bool)): @@ -83,10 +83,10 @@ class ExcelParser(DocumentParser): record[key] = str(value) elif self._include_empty_cells: record[key] = None - + if has_content or self._include_empty_cells: records.append(record) - + return records def parse(self, file_path: str | Path) -> ParseResult: @@ -95,46 +95,46 @@ class ExcelParser(DocumentParser): [AC-AISVC-35] Converts spreadsheet data to JSON format. """ path = Path(file_path) - + if not path.exists(): raise DocumentParseException( f"File not found: {path}", file_path=str(path), parser="excel" ) - + if not self.supports_extension(path.suffix): raise DocumentParseException( f"Unsupported file extension: {path.suffix}", file_path=str(path), parser="excel" ) - + openpyxl = self._get_openpyxl() - + try: workbook = openpyxl.load_workbook(path, read_only=True, data_only=True) - + all_records: list[dict[str, Any]] = [] sheet_count = len(workbook.sheetnames) total_rows = 0 - + for sheet_name in workbook.sheetnames: sheet = workbook[sheet_name] records = self._sheet_to_records(sheet, sheet_name) all_records.extend(records) total_rows += len(records) - + workbook.close() - + json_str = json.dumps(all_records, ensure_ascii=False, indent=2) file_size = path.stat().st_size - + logger.info( f"Parsed Excel (JSON): {path.name}, sheets={sheet_count}, " f"rows={total_rows}, chars={len(json_str)}, size={file_size}" ) - + return ParseResult( text=json_str, source_path=str(path), @@ -146,7 +146,7 @@ class ExcelParser(DocumentParser): "total_rows": total_rows, } ) - + except DocumentParseException: raise except Exception as e: @@ -177,36 +177,36 @@ class CSVParser(DocumentParser): def _parse_csv_to_records(self, path: Path, encoding: str) -> list[dict[str, Any]]: """Parse CSV file and return list of record dictionaries.""" import csv - + records = [] - - with open(path, "r", encoding=encoding, newline="") as f: + + with open(path, encoding=encoding, newline="") as f: reader = csv.reader(f, delimiter=self._delimiter) rows = list(reader) - + if not rows: return records - + headers = rows[0] header_list = [str(h) if h else f"column_{i}" for i, h in enumerate(headers)] - + for row in rows[1:]: record = {} has_content = False - + for i, value in enumerate(row): if i < len(header_list): key = header_list[i] else: key = f"column_{i}" - + if value: has_content = True record[key] = value - + if has_content: records.append(record) - + return records def parse(self, file_path: str | Path) -> ParseResult: @@ -215,14 +215,14 @@ class CSVParser(DocumentParser): [AC-AISVC-35] Converts CSV data to JSON format. """ path = Path(file_path) - + if not path.exists(): raise DocumentParseException( f"File not found: {path}", file_path=str(path), parser="csv" ) - + try: records = self._parse_csv_to_records(path, self._encoding) row_count = len(records) @@ -246,15 +246,15 @@ class CSVParser(DocumentParser): parser="csv", details={"error": str(e)} ) - + json_str = json.dumps(records, ensure_ascii=False, indent=2) file_size = path.stat().st_size - + logger.info( f"Parsed CSV (JSON): {path.name}, rows={row_count}, " f"chars={len(json_str)}, size={file_size}" ) - + return ParseResult( text=json_str, source_path=str(path), diff --git a/ai-service/app/services/document/factory.py b/ai-service/app/services/document/factory.py index 74d4b2b..e19de83 100644 --- a/ai-service/app/services/document/factory.py +++ b/ai-service/app/services/document/factory.py @@ -7,11 +7,11 @@ Design reference: progress.md Section 7.2 - DocumentParserFactory import logging from pathlib import Path -from typing import Any, Type +from typing import Any from app.services.document.base import ( - DocumentParser, DocumentParseException, + DocumentParser, ParseResult, UnsupportedFormatError, ) @@ -29,7 +29,7 @@ class DocumentParserFactory: [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Auto-selects parser based on file extension. """ - _parsers: dict[str, Type[DocumentParser]] = {} + _parsers: dict[str, type[DocumentParser]] = {} _extension_map: dict[str, str] = {} @classmethod @@ -37,7 +37,7 @@ class DocumentParserFactory: """Initialize default parsers.""" if cls._parsers: return - + cls._parsers = { "pdf": PDFParser, "pdfplumber": PDFPlumberParser, @@ -46,7 +46,7 @@ class DocumentParserFactory: "csv": CSVParser, "text": TextParser, } - + cls._extension_map = { ".pdf": "pdf", ".docx": "word", @@ -68,7 +68,7 @@ class DocumentParserFactory: def register_parser( cls, name: str, - parser_class: Type[DocumentParser], + parser_class: type[DocumentParser], extensions: list[str], ) -> None: """ @@ -97,17 +97,17 @@ class DocumentParserFactory: [AC-AISVC-33] Creates appropriate parser based on extension. """ cls._initialize() - + normalized = extension.lower() if not normalized.startswith("."): normalized = f".{normalized}" - + if normalized not in cls._extension_map: raise UnsupportedFormatError(normalized, cls.get_supported_extensions()) - + parser_name = cls._extension_map[normalized] parser_class = cls._parsers[parser_name] - + return parser_class() @classmethod @@ -120,24 +120,24 @@ class DocumentParserFactory: """ Parse a document file. [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Main entry point for parsing. - + Args: file_path: Path to the document file parser_name: Optional specific parser to use parser_config: Optional configuration for the parser - + Returns: ParseResult with extracted text and metadata - + Raises: UnsupportedFormatError: If file format is not supported DocumentParseException: If parsing fails """ cls._initialize() - + path = Path(file_path) extension = path.suffix.lower() - + if parser_name: if parser_name not in cls._parsers: raise DocumentParseException( @@ -151,7 +151,7 @@ class DocumentParserFactory: parser = cls.get_parser_for_extension(extension) if parser_config: parser = type(parser)(**parser_config) - + return parser.parse(path) @classmethod @@ -161,12 +161,12 @@ class DocumentParserFactory: [AC-AISVC-37] Returns parser metadata. """ cls._initialize() - + info = [] for name, parser_class in cls._parsers.items(): temp_instance = parser_class.__new__(parser_class) extensions = temp_instance.get_supported_extensions() - + display_names = { "pdf": "PDF 文档", "pdfplumber": "PDF 文档 (pdfplumber)", @@ -175,7 +175,7 @@ class DocumentParserFactory: "csv": "CSV 文件", "text": "文本文件", } - + descriptions = { "pdf": "使用 PyMuPDF 解析 PDF 文档,速度快", "pdfplumber": "使用 pdfplumber 解析 PDF 文档,表格提取效果更好", @@ -184,14 +184,14 @@ class DocumentParserFactory: "csv": "解析 CSV 文件,自动检测编码", "text": "解析纯文本文件,支持多种编码", } - + info.append({ "name": name, "display_name": display_names.get(name, name), "description": descriptions.get(name, ""), "extensions": extensions, }) - + return info diff --git a/ai-service/app/services/document/pdf_parser.py b/ai-service/app/services/document/pdf_parser.py index 7b160f3..761072e 100644 --- a/ai-service/app/services/document/pdf_parser.py +++ b/ai-service/app/services/document/pdf_parser.py @@ -49,47 +49,47 @@ class PDFParser(DocumentParser): [AC-AISVC-33] Extracts text from all pages. """ path = Path(file_path) - + if not path.exists(): raise DocumentParseException( f"File not found: {path}", file_path=str(path), parser="pdf" ) - + if not self.supports_extension(path.suffix): raise DocumentParseException( f"Unsupported file extension: {path.suffix}", file_path=str(path), parser="pdf" ) - + fitz = self._get_fitz() - + try: doc = fitz.open(path) - + pages: list[PageText] = [] text_parts = [] page_count = len(doc) - + for page_num in range(page_count): page = doc[page_num] text = page.get_text().strip() if text: pages.append(PageText(page=page_num + 1, text=text)) text_parts.append(f"[Page {page_num + 1}]\n{text}") - + doc.close() - + full_text = "\n\n".join(text_parts) file_size = path.stat().st_size - + logger.info( f"Parsed PDF: {path.name}, pages={page_count}, " f"chars={len(full_text)}, size={file_size}" ) - + return ParseResult( text=full_text, source_path=str(path), @@ -101,7 +101,7 @@ class PDFParser(DocumentParser): }, pages=pages, ) - + except DocumentParseException: raise except Exception as e: @@ -121,7 +121,7 @@ class PDFPlumberParser(DocumentParser): """ Alternative PDF parser using pdfplumber. [AC-AISVC-33] Uses pdfplumber for text extraction. - + pdfplumber is better for table extraction but slower than PyMuPDF. """ @@ -149,46 +149,46 @@ class PDFPlumberParser(DocumentParser): [AC-AISVC-33] Extracts text and optionally tables. """ path = Path(file_path) - + if not path.exists(): raise DocumentParseException( f"File not found: {path}", file_path=str(path), parser="pdfplumber" ) - + pdfplumber = self._get_pdfplumber() - + try: pages: list[PageText] = [] text_parts = [] page_count = 0 - + with pdfplumber.open(path) as pdf: page_count = len(pdf.pages) - + for page_num, page in enumerate(pdf.pages): text = page.extract_text() or "" - + if self._extract_tables: tables = page.extract_tables() for table in tables: table_text = self._format_table(table) text += f"\n\n{table_text}" - + text = text.strip() if text: pages.append(PageText(page=page_num + 1, text=text)) text_parts.append(f"[Page {page_num + 1}]\n{text}") - + full_text = "\n\n".join(text_parts) file_size = path.stat().st_size - + logger.info( f"Parsed PDF (pdfplumber): {path.name}, pages={page_count}, " f"chars={len(full_text)}, size={file_size}" ) - + return ParseResult( text=full_text, source_path=str(path), @@ -201,7 +201,7 @@ class PDFPlumberParser(DocumentParser): }, pages=pages, ) - + except DocumentParseException: raise except Exception as e: @@ -216,12 +216,12 @@ class PDFPlumberParser(DocumentParser): """Format a table as text.""" if not table: return "" - + lines = [] for row in table: cells = [str(cell) if cell else "" for cell in row] lines.append(" | ".join(cells)) - + return "\n".join(lines) def get_supported_extensions(self) -> list[str]: diff --git a/ai-service/app/services/document/text_parser.py b/ai-service/app/services/document/text_parser.py index 551b712..eb9088e 100644 --- a/ai-service/app/services/document/text_parser.py +++ b/ai-service/app/services/document/text_parser.py @@ -35,15 +35,15 @@ class TextParser(DocumentParser): """ for enc in ENCODINGS_TO_TRY: try: - with open(path, "r", encoding=enc) as f: + with open(path, encoding=enc) as f: text = f.read() logger.info(f"Successfully parsed with encoding: {enc}") return text, enc except (UnicodeDecodeError, LookupError): continue - + raise DocumentParseException( - f"Failed to decode file with any known encoding", + "Failed to decode file with any known encoding", file_path=str(path), parser="text" ) @@ -54,25 +54,25 @@ class TextParser(DocumentParser): [AC-AISVC-33] Direct file reading. """ path = Path(file_path) - + if not path.exists(): raise DocumentParseException( f"File not found: {path}", file_path=str(path), parser="text" ) - + try: text, encoding_used = self._try_encodings(path) - + file_size = path.stat().st_size line_count = text.count("\n") + 1 - + logger.info( f"Parsed text: {path.name}, lines={line_count}, " f"chars={len(text)}, size={file_size}, encoding={encoding_used}" ) - + return ParseResult( text=text, source_path=str(path), @@ -83,7 +83,7 @@ class TextParser(DocumentParser): "encoding": encoding_used, } ) - + except DocumentParseException: raise except Exception as e: diff --git a/ai-service/app/services/document/word_parser.py b/ai-service/app/services/document/word_parser.py index c40e036..ae02277 100644 --- a/ai-service/app/services/document/word_parser.py +++ b/ai-service/app/services/document/word_parser.py @@ -49,28 +49,28 @@ class WordParser(DocumentParser): [AC-AISVC-34] Extracts text while preserving paragraph structure. """ path = Path(file_path) - + if not path.exists(): raise DocumentParseException( f"File not found: {path}", file_path=str(path), parser="word" ) - + if not self.supports_extension(path.suffix): raise DocumentParseException( f"Unsupported file extension: {path.suffix}", file_path=str(path), parser="word" ) - + Document = self._get_docx() - + try: doc = Document(path) - + text_parts = [] - + if self._include_headers: for section in doc.sections: header = section.header @@ -78,7 +78,7 @@ class WordParser(DocumentParser): header_text = "\n".join(p.text for p in header.paragraphs if p.text.strip()) if header_text: text_parts.append(f"[Header]\n{header_text}") - + for para in doc.paragraphs: if para.text.strip(): style_name = para.style.name if para.style else "" @@ -86,12 +86,12 @@ class WordParser(DocumentParser): text_parts.append(f"\n## {para.text}") else: text_parts.append(para.text) - + for table in doc.tables: table_text = self._format_table(table) if table_text.strip(): text_parts.append(f"\n[Table]\n{table_text}") - + if self._include_footers: for section in doc.sections: footer = section.footer @@ -99,18 +99,18 @@ class WordParser(DocumentParser): footer_text = "\n".join(p.text for p in footer.paragraphs if p.text.strip()) if footer_text: text_parts.append(f"[Footer]\n{footer_text}") - + full_text = "\n\n".join(text_parts) file_size = path.stat().st_size - + paragraph_count = len(doc.paragraphs) table_count = len(doc.tables) - + logger.info( f"Parsed Word: {path.name}, paragraphs={paragraph_count}, " f"tables={table_count}, chars={len(full_text)}, size={file_size}" ) - + return ParseResult( text=full_text, source_path=str(path), @@ -121,7 +121,7 @@ class WordParser(DocumentParser): "table_count": table_count, } ) - + except DocumentParseException: raise except Exception as e: diff --git a/ai-service/app/services/embedding/__init__.py b/ai-service/app/services/embedding/__init__.py index 8fe5844..e8e7a90 100644 --- a/ai-service/app/services/embedding/__init__.py +++ b/ai-service/app/services/embedding/__init__.py @@ -15,13 +15,13 @@ from app.services.embedding.factory import ( get_embedding_config_manager, get_embedding_provider, ) -from app.services.embedding.ollama_provider import OllamaEmbeddingProvider -from app.services.embedding.openai_provider import OpenAIEmbeddingProvider from app.services.embedding.nomic_provider import ( + EmbeddingTask, NomicEmbeddingProvider, NomicEmbeddingResult, - EmbeddingTask, ) +from app.services.embedding.ollama_provider import OllamaEmbeddingProvider +from app.services.embedding.openai_provider import OpenAIEmbeddingProvider __all__ = [ "EmbeddingConfig", diff --git a/ai-service/app/services/embedding/base.py b/ai-service/app/services/embedding/base.py index cea4e49..9bfb247 100644 --- a/ai-service/app/services/embedding/base.py +++ b/ai-service/app/services/embedding/base.py @@ -123,7 +123,7 @@ class EmbeddingProvider(ABC): class EmbeddingException(Exception): """Exception raised when embedding generation fails.""" - + def __init__(self, message: str, provider: str = "", details: dict[str, Any] | None = None): self.provider = provider self.details = details or {} diff --git a/ai-service/app/services/embedding/factory.py b/ai-service/app/services/embedding/factory.py index 90c7285..8b4d338 100644 --- a/ai-service/app/services/embedding/factory.py +++ b/ai-service/app/services/embedding/factory.py @@ -10,12 +10,12 @@ Design reference: progress.md Section 7.1 - Architecture import json import logging from pathlib import Path -from typing import Any, Type +from typing import Any from app.services.embedding.base import EmbeddingException, EmbeddingProvider +from app.services.embedding.nomic_provider import NomicEmbeddingProvider from app.services.embedding.ollama_provider import OllamaEmbeddingProvider from app.services.embedding.openai_provider import OpenAIEmbeddingProvider -from app.services.embedding.nomic_provider import NomicEmbeddingProvider logger = logging.getLogger(__name__) @@ -28,14 +28,14 @@ class EmbeddingProviderFactory: [AC-AISVC-30] Supports dynamic loading based on configuration. """ - _providers: dict[str, Type[EmbeddingProvider]] = { + _providers: dict[str, type[EmbeddingProvider]] = { "ollama": OllamaEmbeddingProvider, "openai": OpenAIEmbeddingProvider, "nomic": NomicEmbeddingProvider, } @classmethod - def register_provider(cls, name: str, provider_class: Type[EmbeddingProvider]) -> None: + def register_provider(cls, name: str, provider_class: type[EmbeddingProvider]) -> None: """ Register a new embedding provider. [AC-AISVC-30] Allows runtime registration of providers. @@ -62,24 +62,24 @@ class EmbeddingProviderFactory: f"Unknown provider: {name}", provider="factory" ) - + provider_class = cls._providers[name] temp_instance = provider_class.__new__(provider_class) - + display_names = { "ollama": "Ollama 本地模型", "openai": "OpenAI Embedding", "nomic": "Nomic Embed (优化版)", } - + descriptions = { "ollama": "使用 Ollama 运行的本地嵌入模型,支持 nomic-embed-text 等开源模型", "openai": "使用 OpenAI 官方 Embedding API,支持 text-embedding-3 系列模型", "nomic": "Nomic-embed-text v1.5 优化版,支持任务前缀和 Matryoshka 维度截断,专为RAG优化", } - + raw_schema = temp_instance.get_config_schema() - + properties = {} required = [] for key, field in raw_schema.items(): @@ -97,14 +97,14 @@ class EmbeddingProviderFactory: properties[key]["maximum"] = field.get("maximum") if field.get("required"): required.append(key) - + config_schema = { "type": "object", "properties": properties, } if required: config_schema["required"] = required - + return { "name": name, "display_name": display_names.get(name, name), @@ -121,14 +121,14 @@ class EmbeddingProviderFactory: """ Create an embedding provider instance. [AC-AISVC-30] Creates provider based on configuration. - + Args: name: Provider identifier (e.g., "ollama", "openai") config: Provider-specific configuration - + Returns: Configured EmbeddingProvider instance - + Raises: EmbeddingException: If provider is unknown or configuration is invalid """ @@ -138,9 +138,9 @@ class EmbeddingProviderFactory: f"Available: {cls.get_available_providers()}", provider="factory" ) - + provider_class = cls._providers[name] - + try: instance = provider_class(**config) logger.info(f"Created embedding provider: {name}") @@ -169,14 +169,14 @@ class EmbeddingConfigManager: self._provider_name = default_provider self._config = self._default_config.copy() self._provider: EmbeddingProvider | None = None - + self._load_from_file() def _load_from_file(self) -> None: """Load configuration from file if exists.""" try: if EMBEDDING_CONFIG_FILE.exists(): - with open(EMBEDDING_CONFIG_FILE, 'r', encoding='utf-8') as f: + with open(EMBEDDING_CONFIG_FILE, encoding='utf-8') as f: saved = json.load(f) self._provider_name = saved.get("provider", self._default_provider) self._config = saved.get("config", self._default_config.copy()) @@ -235,38 +235,38 @@ class EmbeddingConfigManager: """ Update embedding configuration. [AC-AISVC-31, AC-AISVC-40] Supports hot-reload with persistence. - + Args: provider: New provider name config: New provider configuration - + Returns: True if update was successful - + Raises: EmbeddingException: If configuration is invalid """ old_provider = self._provider_name old_config = self._config.copy() - + try: new_provider_instance = EmbeddingProviderFactory.create_provider( provider, config ) - + if self._provider: await self._provider.close() - + self._provider_name = provider self._config = config self._provider = new_provider_instance - + self._save_to_file() - + logger.info(f"Updated embedding config: provider={provider}") return True - + except Exception as e: self._provider_name = old_provider self._config = old_config @@ -285,39 +285,39 @@ class EmbeddingConfigManager: """ Test embedding connection. [AC-AISVC-41] Tests provider connectivity. - + Args: test_text: Text to embed for testing provider: Provider to test (uses current if None) config: Config to test (uses current if None) - + Returns: Dict with test results including success, dimension, latency """ import time - + test_provider_name = provider or self._provider_name test_config = config or self._config - + try: test_provider = EmbeddingProviderFactory.create_provider( test_provider_name, test_config ) - + start_time = time.perf_counter() embedding = await test_provider.embed(test_text) latency_ms = (time.perf_counter() - start_time) * 1000 - + await test_provider.close() - + return { "success": True, "dimension": len(embedding), "latency_ms": latency_ms, "message": f"连接成功,向量维度: {len(embedding)}", } - + except Exception as e: return { "success": False, @@ -346,7 +346,7 @@ def get_embedding_config_manager() -> EmbeddingConfigManager: if _embedding_config_manager is None: from app.core.config import get_settings settings = get_settings() - + _embedding_config_manager = EmbeddingConfigManager( default_provider="nomic", default_config={ diff --git a/ai-service/app/services/embedding/nomic_provider.py b/ai-service/app/services/embedding/nomic_provider.py index cd19825..247766b 100644 --- a/ai-service/app/services/embedding/nomic_provider.py +++ b/ai-service/app/services/embedding/nomic_provider.py @@ -15,7 +15,6 @@ import httpx import numpy as np from app.services.embedding.base import ( - EmbeddingConfig, EmbeddingException, EmbeddingProvider, ) @@ -45,12 +44,12 @@ class NomicEmbeddingResult: class NomicEmbeddingProvider(EmbeddingProvider): """ Nomic-embed-text v1.5 embedding provider with task prefixes. - + Key features: - Task prefixes: search_document: for documents, search_query: for queries - Matryoshka dimension truncation: 256/512/768 dimensions - Automatic normalization after truncation - + Reference: rag-optimization/spec.md Section 2.1, 2.3 """ @@ -87,7 +86,7 @@ class NomicEmbeddingProvider(EmbeddingProvider): prefix = self.DOCUMENT_PREFIX else: prefix = self.QUERY_PREFIX - + if text.startswith(prefix): return text return f"{prefix}{text}" @@ -98,12 +97,12 @@ class NomicEmbeddingProvider(EmbeddingProvider): Matryoshka representation learning allows dimension truncation. """ truncated = embedding[:target_dim] - + arr = np.array(truncated, dtype=np.float32) norm = np.linalg.norm(arr) if norm > 0: arr = arr / norm - + return arr.tolist() async def embed_with_task( @@ -113,18 +112,18 @@ class NomicEmbeddingProvider(EmbeddingProvider): ) -> NomicEmbeddingResult: """ Generate embedding with specified task prefix. - + Args: text: Input text to embed task: DOCUMENT for indexing, QUERY for retrieval - + Returns: NomicEmbeddingResult with all dimension variants """ start_time = time.perf_counter() - + prefixed_text = self._add_prefix(text, task) - + try: client = await self._get_client() response = await client.post( @@ -137,25 +136,25 @@ class NomicEmbeddingProvider(EmbeddingProvider): response.raise_for_status() data = response.json() embedding = data.get("embedding", []) - + if not embedding: raise EmbeddingException( "Empty embedding returned", provider=self.PROVIDER_NAME, details={"text_length": len(text), "task": task.value} ) - + latency_ms = (time.perf_counter() - start_time) * 1000 - + embedding_256 = self._truncate_and_normalize(embedding, 256) embedding_512 = self._truncate_and_normalize(embedding, 512) embedding_full = self._truncate_and_normalize(embedding, len(embedding)) - + logger.debug( f"Generated Nomic embedding: task={task.value}, " f"dim={len(embedding)}, latency={latency_ms:.2f}ms" ) - + return NomicEmbeddingResult( embedding_full=embedding_full, embedding_256=embedding_256, @@ -165,7 +164,7 @@ class NomicEmbeddingProvider(EmbeddingProvider): task=task, latency_ms=latency_ms, ) - + except httpx.HTTPStatusError as e: raise EmbeddingException( f"Ollama API error: {e.response.status_code}", diff --git a/ai-service/app/services/embedding/ollama_embedding.py b/ai-service/app/services/embedding/ollama_embedding.py index 36938e3..eb053dc 100644 --- a/ai-service/app/services/embedding/ollama_embedding.py +++ b/ai-service/app/services/embedding/ollama_embedding.py @@ -4,7 +4,9 @@ Uses nomic-embed-text model via Ollama API. """ import logging + import httpx + from app.core.config import get_settings logger = logging.getLogger(__name__) @@ -15,7 +17,7 @@ async def get_embedding(text: str) -> list[float]: Generate embedding vector for text using Ollama nomic-embed-text model. """ settings = get_settings() - + async with httpx.AsyncClient(timeout=60.0) as client: try: response = await client.post( @@ -28,14 +30,14 @@ async def get_embedding(text: str) -> list[float]: response.raise_for_status() data = response.json() embedding = data.get("embedding", []) - + if not embedding: logger.warning(f"Empty embedding returned for text length={len(text)}") return [0.0] * settings.qdrant_vector_size - + logger.debug(f"Generated embedding: dim={len(embedding)}") return embedding - + except httpx.HTTPStatusError as e: logger.error(f"Ollama API error: {e.response.status_code} - {e.response.text}") raise diff --git a/ai-service/app/services/embedding/ollama_provider.py b/ai-service/app/services/embedding/ollama_provider.py index c57b0a4..f86aa47 100644 --- a/ai-service/app/services/embedding/ollama_provider.py +++ b/ai-service/app/services/embedding/ollama_provider.py @@ -12,7 +12,6 @@ from typing import Any import httpx from app.services.embedding.base import ( - EmbeddingConfig, EmbeddingException, EmbeddingProvider, ) @@ -54,7 +53,7 @@ class OllamaEmbeddingProvider(EmbeddingProvider): [AC-AISVC-29] Returns embedding vector. """ start_time = time.perf_counter() - + try: client = await self._get_client() response = await client.post( @@ -67,22 +66,22 @@ class OllamaEmbeddingProvider(EmbeddingProvider): response.raise_for_status() data = response.json() embedding = data.get("embedding", []) - + if not embedding: raise EmbeddingException( "Empty embedding returned", provider=self.PROVIDER_NAME, details={"text_length": len(text)} ) - + latency_ms = (time.perf_counter() - start_time) * 1000 logger.debug( f"Generated embedding via Ollama: dim={len(embedding)}, " f"latency={latency_ms:.2f}ms" ) - + return embedding - + except httpx.HTTPStatusError as e: raise EmbeddingException( f"Ollama API error: {e.response.status_code}", diff --git a/ai-service/app/services/embedding/openai_provider.py b/ai-service/app/services/embedding/openai_provider.py index 31b4a00..120ccb6 100644 --- a/ai-service/app/services/embedding/openai_provider.py +++ b/ai-service/app/services/embedding/openai_provider.py @@ -48,7 +48,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): self._timeout = timeout_seconds self._client: httpx.AsyncClient | None = None self._extra_config = kwargs - + if dimension: self._dimension = dimension elif model in self.MODEL_DIMENSIONS: @@ -75,17 +75,17 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): [AC-AISVC-29] Supports batch embedding for efficiency. """ start_time = time.perf_counter() - + try: client = await self._get_client() - + request_body: dict[str, Any] = { "model": self._model, "input": texts, } if self._dimension and self._model.startswith("text-embedding-3"): request_body["dimensions"] = self._dimension - + response = await client.post( f"{self._base_url}/embeddings", headers={ @@ -96,7 +96,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): ) response.raise_for_status() data = response.json() - + embeddings = [] for item in data.get("data", []): embedding = item.get("embedding", []) @@ -107,22 +107,22 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): details={"index": item.get("index", 0)} ) embeddings.append(embedding) - + if len(embeddings) != len(texts): raise EmbeddingException( f"Embedding count mismatch: expected {len(texts)}, got {len(embeddings)}", provider=self.PROVIDER_NAME ) - + latency_ms = (time.perf_counter() - start_time) * 1000 logger.debug( f"Generated {len(embeddings)} embeddings via OpenAI: " f"dim={len(embeddings[0]) if embeddings else 0}, " f"latency={latency_ms:.2f}ms" ) - + return embeddings - + except httpx.HTTPStatusError as e: raise EmbeddingException( f"OpenAI API error: {e.response.status_code}", diff --git a/ai-service/app/services/flow/__init__.py b/ai-service/app/services/flow/__init__.py index e3acdcf..79d6b9e 100644 --- a/ai-service/app/services/flow/__init__.py +++ b/ai-service/app/services/flow/__init__.py @@ -3,7 +3,7 @@ Flow services for AI Service. [AC-AISVC-71~AC-AISVC-77] Script flow management and execution engine. """ -from app.services.flow.flow_service import ScriptFlowService from app.services.flow.engine import FlowEngine +from app.services.flow.flow_service import ScriptFlowService __all__ = ["ScriptFlowService", "FlowEngine"] diff --git a/ai-service/app/services/flow/engine.py b/ai-service/app/services/flow/engine.py index 46bb356..5b23fb1 100644 --- a/ai-service/app/services/flow/engine.py +++ b/ai-service/app/services/flow/engine.py @@ -14,10 +14,10 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlmodel import col from app.models.entities import ( - ScriptFlow, + FlowAdvanceResult, FlowInstance, FlowInstanceStatus, - FlowAdvanceResult, + ScriptFlow, TimeoutAction, ) @@ -27,14 +27,14 @@ logger = logging.getLogger(__name__) class FlowEngine: """ [AC-AISVC-74~AC-AISVC-77] State machine engine for script flow execution. - + State Machine: - IDLE: No active flow - ACTIVE: Flow is being executed - COMPLETED: Flow finished successfully - TIMEOUT: Flow timed out - CANCELLED: Flow was cancelled - + Core Methods: - check_active_flow: Check if session has active flow - start: Start a new flow instance @@ -52,11 +52,11 @@ class FlowEngine: ) -> FlowInstance | None: """ [AC-AISVC-75] Check if session has an active flow instance. - + Args: tenant_id: Tenant ID for isolation session_id: Session ID to check - + Returns: Active FlowInstance or None """ @@ -76,12 +76,12 @@ class FlowEngine: ) -> tuple[FlowInstance | None, str | None]: """ [AC-AISVC-74] Start a new flow instance and return first step content. - + Args: tenant_id: Tenant ID for isolation session_id: Session ID for the conversation flow_id: ID of the flow to start - + Returns: Tuple of (FlowInstance, first_step_content) or (None, error_message) """ @@ -141,12 +141,12 @@ class FlowEngine: ) -> FlowAdvanceResult: """ [AC-AISVC-75, AC-AISVC-76] Advance flow based on user input. - + Args: tenant_id: Tenant ID for isolation session_id: Session ID for the conversation user_input: User's input message - + Returns: FlowAdvanceResult with completion status and next message """ @@ -168,11 +168,11 @@ class FlowEngine: return FlowAdvanceResult(completed=True, message=None) current_step = flow.steps[current_step_idx] - + self._record_input(instance, user_input) next_step_no = self._match_next_step(current_step, user_input) - + if next_step_no is None: default_next = current_step.get("default_next") if default_next: @@ -223,11 +223,11 @@ class FlowEngine: ) -> FlowAdvanceResult: """ [AC-AISVC-77] Handle timeout for current step. - + Args: tenant_id: Tenant ID for isolation session_id: Session ID for the conversation - + Returns: FlowAdvanceResult based on timeout_action configuration """ @@ -268,7 +268,7 @@ class FlowEngine: instance.current_step = default_next instance.updated_at = datetime.utcnow() await self._session.flush() - + next_step = flow.steps[default_next - 1] return FlowAdvanceResult( completed=False, @@ -286,7 +286,7 @@ class FlowEngine: instance.completed_at = datetime.utcnow() instance.updated_at = datetime.utcnow() await self._session.flush() - + return FlowAdvanceResult( completed=True, message="抱歉,等待超时,正在为您转接人工客服...", @@ -330,7 +330,7 @@ class FlowEngine: return None flow = await self._get_flow_by_id(instance.flow_id) - + return { "instance_id": str(instance.id), "flow_id": str(instance.flow_id), @@ -372,11 +372,11 @@ class FlowEngine: ) -> int | None: """ Match user input against next_conditions. - + Args: step: Current step definition user_input: User's input message - + Returns: goto_step number if matched, None otherwise """ @@ -410,7 +410,7 @@ class FlowEngine: """Record user input in flow context.""" if instance.context is None: instance.context = {"inputs": []} - + inputs = instance.context.get("inputs", []) inputs.append({ "step": instance.current_step, diff --git a/ai-service/app/services/flow/flow_service.py b/ai-service/app/services/flow/flow_service.py index 7fed5af..74fffe0 100644 --- a/ai-service/app/services/flow/flow_service.py +++ b/ai-service/app/services/flow/flow_service.py @@ -5,10 +5,11 @@ Script Flow Service for AI Service. import logging import uuid +from collections.abc import Sequence from datetime import datetime -from typing import Any, Sequence +from typing import Any -from sqlalchemy import select, func +from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from sqlmodel import col @@ -16,7 +17,6 @@ from app.models.entities import ( ScriptFlow, ScriptFlowCreate, ScriptFlowUpdate, - FlowInstanceStatus, ) logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) class ScriptFlowService: """ [AC-AISVC-71~AC-AISVC-73] Service for managing script flow definitions. - + Features: - Flow CRUD with tenant isolation - Step validation @@ -44,7 +44,7 @@ class ScriptFlowService: [AC-AISVC-71] Create a new script flow with steps. """ self._validate_steps(create_data.steps) - + flow = ScriptFlow( tenant_id=tenant_id, name=create_data.name, @@ -72,10 +72,10 @@ class ScriptFlowService: stmt = select(ScriptFlow).where( ScriptFlow.tenant_id == tenant_id ) - + if is_enabled is not None: stmt = stmt.where(ScriptFlow.is_enabled == is_enabled) - + stmt = stmt.order_by(col(ScriptFlow.created_at).desc()) result = await self._session.execute(stmt) return result.scalars().all() @@ -146,7 +146,7 @@ class ScriptFlowService: flow.updated_at = datetime.utcnow() await self._session.flush() - + logger.info( f"[AC-AISVC-73] Updated script flow: tenant={tenant_id}, id={flow_id}" ) @@ -164,7 +164,7 @@ class ScriptFlowService: await self._session.delete(flow) await self._session.flush() - + logger.info( f"Deleted script flow: tenant={tenant_id}, id={flow_id}" ) @@ -186,7 +186,7 @@ class ScriptFlowService: ) -> int: """Get count of intent rules linked to this flow.""" from app.models.entities import IntentRule - + stmt = select(func.count()).select_from(IntentRule).where( IntentRule.tenant_id == tenant_id, IntentRule.flow_id == flow_id, @@ -198,7 +198,7 @@ class ScriptFlowService: """Validate step definitions.""" if not steps: raise ValueError("Flow must have at least one step") - + step_nos = set() for step in steps: step_no = step.get("step_no") @@ -207,10 +207,10 @@ class ScriptFlowService: if step_no in step_nos: raise ValueError(f"Duplicate step_no: {step_no}") step_nos.add(step_no) - + if not step.get("content"): raise ValueError(f"Step {step_no} must have content") - + next_conditions = step.get("next_conditions", []) for cond in next_conditions: if cond.get("goto_step") is None: diff --git a/ai-service/app/services/intent/__init__.py b/ai-service/app/services/intent/__init__.py index 151c895..a868fab 100644 --- a/ai-service/app/services/intent/__init__.py +++ b/ai-service/app/services/intent/__init__.py @@ -3,8 +3,8 @@ Intent recognition and routing services. [AC-AISVC-65~AC-AISVC-70] Intent rule management and matching engine. """ +from app.services.intent.router import IntentMatchResult, IntentRouter from app.services.intent.rule_service import IntentRuleService -from app.services.intent.router import IntentRouter, IntentMatchResult __all__ = [ "IntentRuleService", diff --git a/ai-service/app/services/intent/router.py b/ai-service/app/services/intent/router.py index 218a560..9007521 100644 --- a/ai-service/app/services/intent/router.py +++ b/ai-service/app/services/intent/router.py @@ -41,7 +41,7 @@ class IntentMatchResult: class IntentRouter: """ [AC-AISVC-69] Intent matching engine. - + Matching algorithm: 1. Load rules ordered by priority DESC 2. For each rule, try keyword matching first @@ -60,11 +60,11 @@ class IntentRouter: ) -> IntentMatchResult | None: """ [AC-AISVC-69] Match user message against intent rules. - + Args: message: User input message rules: List of enabled rules ordered by priority DESC - + Returns: IntentMatchResult if matched, None otherwise """ @@ -93,7 +93,7 @@ class IntentRouter: ) return regex_result - logger.debug(f"[AC-AISVC-70] No intent matched, will fallback to default RAG") + logger.debug("[AC-AISVC-70] No intent matched, will fallback to default RAG") return None def _match_keywords( @@ -160,7 +160,7 @@ class IntentRouter: ) -> tuple[IntentMatchResult | None, str | None]: """ [AC-AISVC-69] Match and return rule_id for statistics update. - + Returns: Tuple of (match_result, rule_id_for_stats) """ diff --git a/ai-service/app/services/intent/rule_service.py b/ai-service/app/services/intent/rule_service.py index 179adca..2b3bbf4 100644 --- a/ai-service/app/services/intent/rule_service.py +++ b/ai-service/app/services/intent/rule_service.py @@ -6,8 +6,9 @@ Intent rule service for AI Service. import logging import time import uuid +from collections.abc import Sequence from datetime import datetime -from typing import Any, Sequence +from typing import Any from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -63,7 +64,7 @@ _rule_cache = RuleCache() class IntentRuleService: """ [AC-AISVC-65~AC-AISVC-68] Service for managing intent rules. - + Features: - Rule CRUD with tenant isolation - Hit count statistics diff --git a/ai-service/app/services/kb.py b/ai-service/app/services/kb.py index 8f5535a..f2861e5 100644 --- a/ai-service/app/services/kb.py +++ b/ai-service/app/services/kb.py @@ -6,10 +6,10 @@ Knowledge Base service for AI Service. import logging import os import uuid +from collections.abc import Sequence from datetime import datetime -from typing import Sequence -from sqlalchemy import select, func +from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from sqlmodel import col diff --git a/ai-service/app/services/llm/base.py b/ai-service/app/services/llm/base.py index cf46d3c..787c2db 100644 --- a/ai-service/app/services/llm/base.py +++ b/ai-service/app/services/llm/base.py @@ -8,8 +8,9 @@ Design reference: design.md Section 8.1 - LLMClient interface """ from abc import ABC, abstractmethod +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator +from typing import Any @dataclass diff --git a/ai-service/app/services/llm/factory.py b/ai-service/app/services/llm/factory.py index 25f1073..c802792 100644 --- a/ai-service/app/services/llm/factory.py +++ b/ai-service/app/services/llm/factory.py @@ -7,7 +7,7 @@ Design pattern: Factory pattern for pluggable LLM providers. import json import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from pathlib import Path from typing import Any @@ -266,9 +266,9 @@ class LLMConfigManager: def __init__(self): from app.core.config import get_settings - + settings = get_settings() - + self._current_provider: str = settings.llm_provider self._current_config: dict[str, Any] = { "api_key": settings.llm_api_key, @@ -278,14 +278,14 @@ class LLMConfigManager: "temperature": settings.llm_temperature, } self._client: LLMClient | None = None - + self._load_from_file() def _load_from_file(self) -> None: """Load configuration from file if exists.""" try: if LLM_CONFIG_FILE.exists(): - with open(LLM_CONFIG_FILE, 'r', encoding='utf-8') as f: + with open(LLM_CONFIG_FILE, encoding='utf-8') as f: saved = json.load(f) self._current_provider = saved.get("provider", self._current_provider) saved_config = saved.get("config", {}) @@ -343,7 +343,7 @@ class LLMConfigManager: self._current_provider = provider self._current_config = validated_config - + self._save_to_file() logger.info(f"[AC-ASA-16] LLM config updated: provider={provider}") @@ -357,7 +357,7 @@ class LLMConfigManager: """Validate configuration against provider schema.""" schema_props = provider_info.config_schema.get("properties", {}) required_fields = provider_info.config_schema.get("required", []) - + validated = {} for key, prop_schema in schema_props.items(): if key in config: @@ -399,7 +399,7 @@ class LLMConfigManager: test_provider = provider or self._current_provider test_config = config if config else self._current_config - + logger.info(f"[AC-ASA-17] Test connection: provider={test_provider}, model={test_config.get('model')}") if test_provider not in LLM_PROVIDERS: diff --git a/ai-service/app/services/llm/openai_client.py b/ai-service/app/services/llm/openai_client.py index fc36959..859ed08 100644 --- a/ai-service/app/services/llm/openai_client.py +++ b/ai-service/app/services/llm/openai_client.py @@ -9,7 +9,8 @@ Design reference: design.md Section 8.1 - LLMClient interface import json import logging -from typing import Any, AsyncGenerator +from collections.abc import AsyncGenerator +from typing import Any import httpx from tenacity import ( @@ -20,7 +21,7 @@ from tenacity import ( ) from app.core.config import get_settings -from app.core.exceptions import AIServiceException, ErrorCode, ServiceUnavailableException, TimeoutException +from app.core.exceptions import AIServiceException, ErrorCode, TimeoutException from app.services.llm.base import LLMClient, LLMConfig, LLMResponse, LLMStreamChunk logger = logging.getLogger(__name__) @@ -133,13 +134,13 @@ class OpenAIClient(LLMClient): body = self._build_request_body(messages, effective_config, stream=False, **kwargs) logger.info(f"[AC-AISVC-02] Generating response with model={effective_config.model}") - logger.info(f"[AC-AISVC-02] ========== FULL PROMPT TO AI ==========") + logger.info("[AC-AISVC-02] ========== FULL PROMPT TO AI ==========") for i, msg in enumerate(messages): role = msg.get("role", "unknown") content = msg.get("content", "") logger.info(f"[AC-AISVC-02] [{i}] role={role}, content_length={len(content)}") logger.info(f"[AC-AISVC-02] [{i}] content:\n{content}") - logger.info(f"[AC-AISVC-02] ======================================") + logger.info("[AC-AISVC-02] ======================================") try: response = await client.post( @@ -220,13 +221,13 @@ class OpenAIClient(LLMClient): body = self._build_request_body(messages, effective_config, stream=True, **kwargs) logger.info(f"[AC-AISVC-06] Starting streaming generation with model={effective_config.model}") - logger.info(f"[AC-AISVC-06] ========== FULL PROMPT TO AI (STREAMING) ==========") + logger.info("[AC-AISVC-06] ========== FULL PROMPT TO AI (STREAMING) ==========") for i, msg in enumerate(messages): role = msg.get("role", "unknown") content = msg.get("content", "") logger.info(f"[AC-AISVC-06] [{i}] role={role}, content_length={len(content)}") logger.info(f"[AC-AISVC-06] [{i}] content:\n{content}") - logger.info(f"[AC-AISVC-06] ======================================") + logger.info("[AC-AISVC-06] ======================================") try: async with client.stream( @@ -263,7 +264,7 @@ class OpenAIClient(LLMClient): details=[{"status_code": e.response.status_code, "response": error_detail}], ) - logger.info(f"[AC-AISVC-06] Streaming generation completed") + logger.info("[AC-AISVC-06] Streaming generation completed") def _parse_stream_chunk( self, diff --git a/ai-service/app/services/memory.py b/ai-service/app/services/memory.py index 5db74f5..2c70935 100644 --- a/ai-service/app/services/memory.py +++ b/ai-service/app/services/memory.py @@ -4,13 +4,13 @@ Memory service for AI Service. """ import logging -from typing import Sequence +from collections.abc import Sequence from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from sqlmodel import col -from app.models.entities import ChatMessage, ChatMessageCreate, ChatSession, ChatSessionCreate +from app.models.entities import ChatMessage, ChatSession logger = logging.getLogger(__name__) diff --git a/ai-service/app/services/monitoring/prompt_monitor.py b/ai-service/app/services/monitoring/prompt_monitor.py index 0dc2d4b..e7c3336 100644 --- a/ai-service/app/services/monitoring/prompt_monitor.py +++ b/ai-service/app/services/monitoring/prompt_monitor.py @@ -165,24 +165,24 @@ class PromptMonitor: select(PromptTemplateVersion) .where( PromptTemplateVersion.template_id == template_id, - PromptTemplateVersion.status == TemplateVersionStatus.PUBLISHED.value, ) + .order_by(PromptTemplateVersion.version.desc()) ) version_result = await self._session.execute(version_stmt) - published_version = version_result.scalar_one_or_none() + latest_version = version_result.scalar_one_or_none() - if not published_version: + if not latest_version: return None resolver = VariableResolver() merged_variables = self._merge_variables( - published_version.variables or [], + latest_version.variables or [], variables or {}, ) rendered_content = resolver.resolve( - published_version.system_instruction, - published_version.variables, + latest_version.system_instruction, + latest_version.variables, merged_variables, ) @@ -214,8 +214,8 @@ class PromptMonitor: return PromptPreviewResult( template_id=str(template.id), template_name=template.name, - version=published_version.version, - raw_content=published_version.system_instruction, + version=latest_version.version, + raw_content=latest_version.system_instruction, variables=variable_list, rendered_content=rendered_content, estimated_tokens=token_count.total, diff --git a/ai-service/app/services/orchestrator.py b/ai-service/app/services/orchestrator.py index 9b3d418..8e63209 100644 --- a/ai-service/app/services/orchestrator.py +++ b/ai-service/app/services/orchestrator.py @@ -2,15 +2,19 @@ Orchestrator service for AI Service. [AC-AISVC-01, AC-AISVC-02, AC-AISVC-06, AC-AISVC-07] Core orchestration logic for chat generation. -Design reference: design.md Section 2.2 - 关键数据流 -1. Memory.load(tenantId, sessionId) -2. merge_context(local_history, external_history) -3. Retrieval.retrieve(query, tenantId, channelType, metadata) -4. build_prompt(merged_history, retrieved_docs, currentMessage) -5. LLM.generate(...) (non-streaming) or LLM.stream_generate(...) (streaming) -6. compute_confidence(...) -7. Memory.append(tenantId, sessionId, user/assistant messages) -8. Return ChatResponse (or output via SSE) +Design reference: design.md Section 10 - Orchestrator升级为12步pipeline +1. InputScanner: Scan user input for forbidden words (logging only) +2. FlowEngine: Check if session has active script flow +3. IntentRouter: Match intent rules and route to appropriate handler +4. QueryRewriter: (Optional, skipped in MVP) Rewrite query for better retrieval +5. Multi-KB Retrieval: Retrieve from target knowledge bases +6. ResultRanker: Rank results by KB type priority +7. PromptBuilder: Load template + inject behavior rules +8. LLM.generate: Generate response +9. OutputFilter: Filter forbidden words in output +10. Confidence: Calculate confidence score +11. Memory: Save messages +12. Return: Build and return ChatResponse RAG Optimization (rag-optimization/spec.md): - Two-stage retrieval with Matryoshka dimensions @@ -19,24 +23,36 @@ RAG Optimization (rag-optimization/spec.md): """ import logging +import uuid +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator +from typing import Any from sse_starlette.sse import ServerSentEvent from app.core.config import get_settings +from app.core.database import get_session from app.core.prompts import SYSTEM_PROMPT, format_evidence_for_prompt from app.core.sse import ( + SSEStateMachine, create_error_event, create_final_event, create_message_event, - SSEStateMachine, ) from app.models import ChatRequest, ChatResponse from app.services.confidence import ConfidenceCalculator, ConfidenceResult from app.services.context import ContextMerger, MergedContext +from app.services.flow.engine import FlowEngine +from app.services.guardrail.behavior_service import BehaviorRuleService +from app.services.guardrail.input_scanner import InputScanner +from app.services.guardrail.output_filter import OutputFilter +from app.services.guardrail.word_service import ForbiddenWordService +from app.services.intent.router import IntentRouter +from app.services.intent.rule_service import IntentRuleService from app.services.llm.base import LLMClient, LLMConfig, LLMResponse from app.services.memory import MemoryService +from app.services.prompt.template_service import PromptTemplateService +from app.services.prompt.variable_resolver import VariableResolver from app.services.retrieval.base import BaseRetriever, RetrievalContext, RetrievalResult logger = logging.getLogger(__name__) @@ -60,6 +76,20 @@ class GenerationContext: """ [AC-AISVC-01, AC-AISVC-02] Context accumulated during generation pipeline. Contains all intermediate results for diagnostics and response building. + + 12-Step Pipeline tracking: + 1. input_scan_result: InputScanner result + 2. active_flow: Active FlowInstance if exists + 3. intent_match: IntentMatchResult if matched + 4. query_rewritten: Rewritten query (optional) + 5. retrieval_result: Multi-KB retrieval result + 6. ranked_results: Ranked retrieval results + 7. system_prompt: Built system prompt with template + behavior rules + 8. llm_response: LLM generation result + 9. filtered_reply: Output after forbidden word filtering + 10. confidence_result: Confidence calculation result + 11. messages_saved: Whether messages were saved + 12. final_response: Final ChatResponse """ tenant_id: str session_id: str @@ -67,19 +97,45 @@ class GenerationContext: channel_type: str request_metadata: dict[str, Any] | None = None + # Original pipeline fields local_history: list[dict[str, str]] = field(default_factory=list) merged_context: MergedContext | None = None retrieval_result: RetrievalResult | None = None llm_response: LLMResponse | None = None confidence_result: ConfidenceResult | None = None + # Phase 10-14 new pipeline fields + input_scan_result: Any = None # InputScanResult + active_flow: Any = None # FlowInstance + intent_match: Any = None # IntentMatchResult + query_rewritten: str | None = None + ranked_results: list[Any] = field(default_factory=list) + system_prompt: str | None = None + filtered_reply: str | None = None + target_kb_ids: list[str] | None = None + behavior_rules: list[str] = field(default_factory=list) + diagnostics: dict[str, Any] = field(default_factory=dict) class OrchestratorService: """ [AC-AISVC-01, AC-AISVC-02, AC-AISVC-06, AC-AISVC-07] Orchestrator for chat generation. - Coordinates memory, retrieval, and LLM components. + Coordinates memory, retrieval, LLM, and guardrail components. + + 12-Step Pipeline (design.md Section 10): + 1. InputScanner: Scan user input for forbidden words + 2. FlowEngine: Check if session has active script flow + 3. IntentRouter: Match intent rules and route + 4. QueryRewriter: (Optional, skipped in MVP) + 5. Multi-KB Retrieval: Retrieve from target knowledge bases + 6. ResultRanker: Rank results by KB type priority + 7. PromptBuilder: Load template + inject behavior rules + 8. LLM.generate: Generate response + 9. OutputFilter: Filter forbidden words in output + 10. Confidence: Calculate confidence score + 11. Memory: Save messages + 12. Return: Build and return ChatResponse SSE Event Flow (per design.md Section 6.2): - message* (0 or more) -> final (exactly 1) -> close @@ -94,6 +150,15 @@ class OrchestratorService: context_merger: ContextMerger | None = None, confidence_calculator: ConfidenceCalculator | None = None, config: OrchestratorConfig | None = None, + # Phase 10-14 new services + input_scanner: InputScanner | None = None, + intent_router: IntentRouter | None = None, + intent_rule_service: IntentRuleService | None = None, + flow_engine: FlowEngine | None = None, + prompt_template_service: PromptTemplateService | None = None, + variable_resolver: VariableResolver | None = None, + behavior_rule_service: BehaviorRuleService | None = None, + output_filter: OutputFilter | None = None, ): """ Initialize orchestrator with optional dependencies for DI. @@ -105,6 +170,14 @@ class OrchestratorService: context_merger: Context merger for history deduplication confidence_calculator: Confidence calculator for response scoring config: Orchestrator configuration + input_scanner: Input scanner for forbidden word detection + intent_router: Intent router for rule matching + intent_rule_service: Intent rule service for loading rules + flow_engine: Flow engine for script flow execution + prompt_template_service: Prompt template service for template loading + variable_resolver: Variable resolver for template variable substitution + behavior_rule_service: Behavior rule service for loading behavior rules + output_filter: Output filter for forbidden word filtering """ settings = get_settings() self._llm_client = llm_client @@ -121,6 +194,16 @@ class OrchestratorService: ) self._llm_config: LLMConfig | None = None + # Phase 10-14 services + self._input_scanner = input_scanner + self._intent_router = intent_router or IntentRouter() + self._intent_rule_service = intent_rule_service + self._flow_engine = flow_engine + self._prompt_template_service = prompt_template_service + self._variable_resolver = variable_resolver or VariableResolver() + self._behavior_rule_service = behavior_rule_service + self._output_filter = output_filter + async def generate( self, tenant_id: str, @@ -128,29 +211,27 @@ class OrchestratorService: ) -> ChatResponse: """ Generate a non-streaming response. - [AC-AISVC-01, AC-AISVC-02] Complete generation pipeline. + [AC-AISVC-01, AC-AISVC-02] Complete 12-step generation pipeline. - Pipeline (per design.md Section 2.2): - 1. Load local history from Memory - 2. Merge with external history (dedup + truncate) - 3. RAG retrieval (optional) - 4. Build prompt with context and evidence - 5. LLM generation - 6. Calculate confidence - 7. Save messages to Memory - 8. Return ChatResponse + 12-Step Pipeline (design.md Section 10): + 1. InputScanner: Scan user input for forbidden words + 2. FlowEngine: Check if session has active script flow + 3. IntentRouter: Match intent rules and route + 4. QueryRewriter: (Optional, skipped in MVP) + 5. Multi-KB Retrieval: Retrieve from target knowledge bases + 6. ResultRanker: Rank results by KB type priority + 7. PromptBuilder: Load template + inject behavior rules + 8. LLM.generate: Generate response + 9. OutputFilter: Filter forbidden words in output + 10. Confidence: Calculate confidence score + 11. Memory: Save messages + 12. Return: Build and return ChatResponse """ logger.info( - f"[AC-AISVC-01] Starting generation for tenant={tenant_id}, " + f"[AC-AISVC-01] Starting 12-step generation for tenant={tenant_id}, " f"session={request.session_id}, channel_type={request.channel_type}, " f"current_message={request.current_message[:100]}..." ) - logger.info( - f"[AC-AISVC-01] Config: enable_rag={self._config.enable_rag}, " - f"use_optimized_retriever={self._config.use_optimized_retriever}, " - f"llm_client={'configured' if self._llm_client else 'NOT configured'}, " - f"retriever={'configured' if self._retriever else 'NOT configured'}" - ) ctx = GenerationContext( tenant_id=tenant_id, @@ -161,19 +242,42 @@ class OrchestratorService: ) try: - await self._load_local_history(ctx) + # Step 1: InputScanner - Scan user input for forbidden words + await self._scan_input(ctx) + # Load local history and merge context (original pipeline) + await self._load_local_history(ctx) await self._merge_context(ctx, request.history) + # Step 2: FlowEngine - Check if session has active script flow + await self._check_active_flow(ctx) + + # Step 3: IntentRouter - Match intent rules and route + await self._match_intent(ctx) + + # Step 4: QueryRewriter - (Optional, skipped in MVP) + # ctx.query_rewritten = ctx.current_message + + # Step 5-6: Multi-KB Retrieval + ResultRanker if self._config.enable_rag and self._retriever: await self._retrieve_evidence(ctx) + # Step 7: PromptBuilder - Load template + inject behavior rules + await self._build_system_prompt(ctx) + + # Step 8: LLM.generate - Generate response await self._generate_response(ctx) + # Step 9: OutputFilter - Filter forbidden words in output + await self._filter_output(ctx) + + # Step 10: Confidence - Calculate confidence score self._calculate_confidence(ctx) + # Step 11: Memory - Save messages await self._save_messages(ctx) + # Step 12: Return - Build and return ChatResponse return self._build_response(ctx) except Exception as e: @@ -219,6 +323,200 @@ class OrchestratorService: logger.warning(f"[AC-AISVC-13] Failed to load history: {e}") ctx.diagnostics["memory_error"] = str(e) + async def _scan_input(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-83] Step 1: Scan user input for forbidden words (logging only). + """ + if not self._input_scanner: + logger.debug("[AC-AISVC-83] No input scanner configured, skipping") + ctx.diagnostics["input_scan_enabled"] = False + return + + try: + ctx.input_scan_result = await self._input_scanner.scan( + text=ctx.current_message, + tenant_id=ctx.tenant_id, + ) + + ctx.diagnostics["input_scan"] = { + "flagged": ctx.input_scan_result.flagged, + "matched_words": ctx.input_scan_result.matched_words, + "matched_categories": ctx.input_scan_result.matched_categories, + } + + if ctx.input_scan_result.flagged: + logger.info( + f"[AC-AISVC-83] Input flagged: words={ctx.input_scan_result.matched_words}, " + f"categories={ctx.input_scan_result.matched_categories}" + ) + + except Exception as e: + logger.warning(f"[AC-AISVC-83] Input scan failed: {e}") + ctx.diagnostics["input_scan_error"] = str(e) + + async def _check_active_flow(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-75] Step 2: Check if session has active script flow. + If active flow exists, advance it based on user input. + """ + if not self._flow_engine: + logger.debug("[AC-AISVC-75] No flow engine configured, skipping") + ctx.diagnostics["flow_check_enabled"] = False + return + + try: + ctx.active_flow = await self._flow_engine.check_active_flow( + tenant_id=ctx.tenant_id, + session_id=ctx.session_id, + ) + + if ctx.active_flow: + logger.info( + f"[AC-AISVC-75] Active flow found: flow_id={ctx.active_flow.flow_id}, " + f"current_step={ctx.active_flow.current_step}" + ) + + # Advance the flow based on user input + advance_result = await self._flow_engine.advance( + instance=ctx.active_flow, + user_input=ctx.current_message, + ) + + ctx.diagnostics["flow_advance"] = { + "completed": advance_result.completed, + "has_message": advance_result.message is not None, + } + + # If flow provides a message, use it as the reply and skip LLM + if advance_result.message: + ctx.llm_response = LLMResponse( + content=advance_result.message, + model="script_flow", + usage={}, + finish_reason="flow_step", + ) + ctx.diagnostics["flow_handled"] = True + logger.info(f"[AC-AISVC-75] Flow provided reply, skipping LLM") + + else: + ctx.diagnostics["flow_check_enabled"] = True + ctx.diagnostics["active_flow"] = False + + except Exception as e: + logger.warning(f"[AC-AISVC-75] Flow check failed: {e}") + ctx.diagnostics["flow_check_error"] = str(e) + + async def _match_intent(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-69, AC-AISVC-70] Step 3: Match intent rules and route. + Routes to: fixed reply, RAG with target KBs, flow start, or transfer. + """ + # Skip if flow already handled the request + if ctx.diagnostics.get("flow_handled"): + logger.info("[AC-AISVC-69] Flow already handled, skipping intent matching") + return + + if not self._intent_rule_service: + logger.debug("[AC-AISVC-69] No intent rule service configured, skipping") + ctx.diagnostics["intent_match_enabled"] = False + return + + try: + # Load enabled rules ordered by priority + async with get_session() as session: + from app.services.intent.rule_service import IntentRuleService + rule_service = IntentRuleService(session) + rules = await rule_service.get_enabled_rules_for_matching(ctx.tenant_id) + + if not rules: + ctx.diagnostics["intent_match_enabled"] = True + ctx.diagnostics["intent_matched"] = False + return + + # Match intent + ctx.intent_match = self._intent_router.match( + message=ctx.current_message, + rules=rules, + ) + + if ctx.intent_match: + logger.info( + f"[AC-AISVC-69] Intent matched: rule={ctx.intent_match.rule.name}, " + f"response_type={ctx.intent_match.rule.response_type}" + ) + + ctx.diagnostics["intent_match"] = ctx.intent_match.to_dict() + + # Increment hit count + async with get_session() as session: + rule_service = IntentRuleService(session) + await rule_service.increment_hit_count( + tenant_id=ctx.tenant_id, + rule_id=ctx.intent_match.rule.id, + ) + + # Route based on response_type + if ctx.intent_match.rule.response_type == "fixed": + # Fixed reply - skip LLM + ctx.llm_response = LLMResponse( + content=ctx.intent_match.rule.fixed_reply or "收到您的消息。", + model="intent_fixed", + usage={}, + finish_reason="intent_fixed", + ) + ctx.diagnostics["intent_handled"] = True + logger.info("[AC-AISVC-70] Intent fixed reply, skipping LLM") + + elif ctx.intent_match.rule.response_type == "rag": + # RAG with target KBs + ctx.target_kb_ids = ctx.intent_match.rule.target_kb_ids or [] + logger.info(f"[AC-AISVC-70] Intent RAG, target_kb_ids={ctx.target_kb_ids}") + + elif ctx.intent_match.rule.response_type == "flow": + # Start script flow + if ctx.intent_match.rule.flow_id and self._flow_engine: + async with get_session() as session: + flow_engine = FlowEngine(session) + instance, first_step = await flow_engine.start( + tenant_id=ctx.tenant_id, + session_id=ctx.session_id, + flow_id=ctx.intent_match.rule.flow_id, + ) + if first_step: + ctx.llm_response = LLMResponse( + content=first_step, + model="script_flow", + usage={}, + finish_reason="flow_start", + ) + ctx.diagnostics["intent_handled"] = True + logger.info("[AC-AISVC-70] Intent flow started, skipping LLM") + + elif ctx.intent_match.rule.response_type == "transfer": + # Transfer to human + ctx.llm_response = LLMResponse( + content=ctx.intent_match.rule.transfer_message or "正在为您转接人工客服...", + model="intent_transfer", + usage={}, + finish_reason="intent_transfer", + ) + ctx.confidence_result = ConfidenceResult( + confidence=0.0, + should_transfer=True, + transfer_reason="intent_rule_transfer", + is_retrieval_insufficient=False, + ) + ctx.diagnostics["intent_handled"] = True + logger.info("[AC-AISVC-70] Intent transfer, skipping LLM") + + else: + ctx.diagnostics["intent_match_enabled"] = True + ctx.diagnostics["intent_matched"] = False + + except Exception as e: + logger.warning(f"[AC-AISVC-69] Intent matching failed: {e}") + ctx.diagnostics["intent_match_error"] = str(e) + async def _merge_context( self, ctx: GenerationContext, @@ -263,8 +561,13 @@ class OrchestratorService: async def _retrieve_evidence(self, ctx: GenerationContext) -> None: """ [AC-AISVC-16, AC-AISVC-17] RAG retrieval for evidence. - Step 3 of the generation pipeline. + Step 5-6: Multi-KB retrieval with target KBs from intent matching. """ + # Skip if flow or intent already handled + if ctx.diagnostics.get("flow_handled") or ctx.diagnostics.get("intent_handled"): + logger.info("[AC-AISVC-16] Request already handled, skipping retrieval") + return + logger.info( f"[AC-AISVC-16] Starting retrieval: tenant={ctx.tenant_id}, " f"query={ctx.current_message[:100]}..., retriever={type(self._retriever).__name__ if self._retriever else 'None'}" @@ -278,6 +581,12 @@ class OrchestratorService: metadata=ctx.request_metadata, ) + # If intent matched with target KBs, pass them to retriever + if ctx.target_kb_ids: + retrieval_ctx.metadata = retrieval_ctx.metadata or {} + retrieval_ctx.metadata["target_kb_ids"] = ctx.target_kb_ids + logger.info(f"[AC-AISVC-16] Using target_kb_ids from intent: {ctx.target_kb_ids}") + ctx.retrieval_result = await self._retriever.retrieve(retrieval_ctx) ctx.diagnostics["retrieval"] = { @@ -292,7 +601,7 @@ class OrchestratorService: f"max_score={ctx.retrieval_result.max_score:.3f}, " f"is_empty={ctx.retrieval_result.is_empty}" ) - + if ctx.retrieval_result.hit_count > 0: for i, hit in enumerate(ctx.retrieval_result.hits[:3]): logger.info( @@ -308,11 +617,76 @@ class OrchestratorService: ) ctx.diagnostics["retrieval_error"] = str(e) + async def _build_system_prompt(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-56, AC-AISVC-84] Step 7: Build system prompt with template + behavior rules. + """ + # Skip if flow or intent already handled + if ctx.diagnostics.get("flow_handled") or ctx.diagnostics.get("intent_handled"): + logger.info("[AC-AISVC-56] Request already handled, using default prompt") + ctx.system_prompt = self._config.system_prompt + return + + try: + # Try to load template from service + if self._prompt_template_service: + async with get_session() as session: + template_service = PromptTemplateService(session) + template_version = await template_service.get_published_template( + tenant_id=ctx.tenant_id, + scene="default", # TODO: Make scene configurable + ) + + if template_version: + # Resolve variables + variables = { + "persona_name": "AI助手", + "current_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "channel_type": ctx.channel_type, + } + ctx.system_prompt = self._variable_resolver.resolve( + template=template_version.system_instruction, + variables=variables, + ) + logger.info(f"[AC-AISVC-56] Loaded template: scene=default, version={template_version.version}") + else: + ctx.system_prompt = self._config.system_prompt + logger.info("[AC-AISVC-56] No published template found, using default") + else: + ctx.system_prompt = self._config.system_prompt + + # Load and inject behavior rules + if self._behavior_rule_service: + async with get_session() as session: + behavior_service = BehaviorRuleService(session) + rules = await behavior_service.get_enabled_rules(ctx.tenant_id) + + if rules: + ctx.behavior_rules = [rule.rule_text for rule in rules] + behavior_text = "\n".join([f"- {rule}" for rule in ctx.behavior_rules]) + ctx.system_prompt += f"\n\n行为约束:\n{behavior_text}" + logger.info(f"[AC-AISVC-84] Injected {len(rules)} behavior rules") + + ctx.diagnostics["prompt_template"] = { + "source": "template" if self._prompt_template_service else "default", + "behavior_rules_count": len(ctx.behavior_rules), + } + + except Exception as e: + logger.warning(f"[AC-AISVC-56] Failed to build system prompt: {e}") + ctx.system_prompt = self._config.system_prompt + ctx.diagnostics["prompt_build_error"] = str(e) + async def _generate_response(self, ctx: GenerationContext) -> None: """ [AC-AISVC-02] Generate response using LLM. - Step 4-5 of the generation pipeline. + Step 8 of the 12-step pipeline. """ + # Skip if flow or intent already handled + if ctx.diagnostics.get("flow_handled") or ctx.diagnostics.get("intent_handled"): + logger.info("[AC-AISVC-02] Request already handled, skipping LLM generation") + return + messages = self._build_llm_messages(ctx) logger.info( f"[AC-AISVC-02] Building LLM messages: count={len(messages)}, " @@ -368,13 +742,62 @@ class OrchestratorService: ctx.diagnostics["llm_mode"] = "fallback" ctx.diagnostics["fallback_reason"] = f"llm_error: {str(e)}" + async def _filter_output(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-82] Step 9: Filter forbidden words in output. + """ + if not ctx.llm_response: + logger.debug("[AC-AISVC-82] No LLM response to filter") + return + + if not self._output_filter: + logger.debug("[AC-AISVC-82] No output filter configured, skipping") + ctx.filtered_reply = ctx.llm_response.content + ctx.diagnostics["output_filter_enabled"] = False + return + + try: + filter_result = await self._output_filter.filter( + reply=ctx.llm_response.content, + tenant_id=ctx.tenant_id, + ) + + ctx.filtered_reply = filter_result.filtered_text + ctx.diagnostics["output_filter"] = { + "triggered": filter_result.triggered, + "matched_words": filter_result.matched_words, + "strategy_applied": filter_result.strategy_applied, + } + + if filter_result.triggered: + logger.info( + f"[AC-AISVC-82] Output filtered: words={filter_result.matched_words}, " + f"strategy={filter_result.strategy_applied}" + ) + + # If blocked, override confidence + if filter_result.strategy_applied == "block": + ctx.confidence_result = ConfidenceResult( + confidence=0.0, + should_transfer=True, + transfer_reason="output_blocked_by_guardrail", + is_retrieval_insufficient=False, + ) + + except Exception as e: + logger.warning(f"[AC-AISVC-82] Output filtering failed: {e}") + ctx.filtered_reply = ctx.llm_response.content + ctx.diagnostics["output_filter_error"] = str(e) + def _build_llm_messages(self, ctx: GenerationContext) -> list[dict[str, str]]: """ [AC-AISVC-02] Build messages for LLM including system prompt and evidence. + Uses ctx.system_prompt from Step 7 (template + behavior rules). """ messages = [] - system_content = self._config.system_prompt + # Use system prompt from Step 7 (template + behavior rules) + system_content = ctx.system_prompt or self._config.system_prompt if ctx.retrieval_result and not ctx.retrieval_result.is_empty: evidence_text = self._format_evidence(ctx.retrieval_result) @@ -386,20 +809,12 @@ class OrchestratorService: messages.extend(ctx.merged_context.messages) messages.append({"role": "user", "content": ctx.current_message}) - + logger.info( f"[AC-AISVC-02] Built {len(messages)} messages for LLM: " f"system_len={len(system_content)}, history_count={len(ctx.merged_context.messages) if ctx.merged_context else 0}" ) logger.debug(f"[AC-AISVC-02] System prompt preview: {system_content[:500]}...") - - logger.info(f"[AC-AISVC-02] ========== ORCHESTRATOR FULL PROMPT ==========") - for i, msg in enumerate(messages): - role = msg.get("role", "unknown") - content = msg.get("content", "") - logger.info(f"[AC-AISVC-02] [{i}] role={role}, content_length={len(content)}") - logger.info(f"[AC-AISVC-02] [{i}] content:\n{content}") - logger.info(f"[AC-AISVC-02] ==============================================") return messages @@ -459,7 +874,8 @@ class OrchestratorService: async def _save_messages(self, ctx: GenerationContext) -> None: """ [AC-AISVC-13] Save user and assistant messages to Memory. - Step 7 of the generation pipeline. + Step 11 of the 12-step pipeline. + Uses filtered_reply from Step 9. """ if not self._memory_service: logger.info("[AC-AISVC-13] No memory service configured, skipping save") @@ -477,7 +893,13 @@ class OrchestratorService: {"role": "user", "content": ctx.current_message}, ] - if ctx.llm_response: + # Use filtered_reply if available, otherwise use llm_response.content + if ctx.filtered_reply: + messages_to_save.append({ + "role": "assistant", + "content": ctx.filtered_reply, + }) + elif ctx.llm_response: messages_to_save.append({ "role": "assistant", "content": ctx.llm_response.content, @@ -503,9 +925,16 @@ class OrchestratorService: def _build_response(self, ctx: GenerationContext) -> ChatResponse: """ [AC-AISVC-02] Build final ChatResponse from generation context. - Step 8 of the generation pipeline. + Step 12 of the 12-step pipeline. + Uses filtered_reply from Step 9. """ - reply = ctx.llm_response.content if ctx.llm_response else self._fallback_response(ctx) + # Use filtered_reply if available, otherwise use llm_response.content + if ctx.filtered_reply: + reply = ctx.filtered_reply + elif ctx.llm_response: + reply = ctx.llm_response.content + else: + reply = self._fallback_response(ctx) confidence = ctx.confidence_result.confidence if ctx.confidence_result else 0.5 should_transfer = ctx.confidence_result.should_transfer if ctx.confidence_result else True @@ -534,6 +963,12 @@ class OrchestratorService: Generate a streaming response. [AC-AISVC-06, AC-AISVC-07, AC-AISVC-08] Yields SSE events in proper sequence. + 12-Step Pipeline (same as generate, but with streaming LLM output): + 1-7: Same as generate() up to PromptBuilder + 8: LLM.stream_generate (streaming) + 9: OutputFilter with streaming support + 10-12: Confidence, Memory, Return + SSE Event Sequence (per design.md Section 6.2): 1. message events (multiple) - each with incremental delta 2. final event (exactly 1) - with complete response @@ -545,7 +980,7 @@ class OrchestratorService: 3. connection close """ logger.info( - f"[AC-AISVC-06] Starting streaming generation for tenant={tenant_id}, " + f"[AC-AISVC-06] Starting 12-step streaming generation for tenant={tenant_id}, " f"session={request.session_id}" ) @@ -561,15 +996,33 @@ class OrchestratorService: ) try: + # Steps 1-7: Same as generate() + await self._scan_input(ctx) await self._load_local_history(ctx) await self._merge_context(ctx, request.history) + await self._check_active_flow(ctx) + await self._match_intent(ctx) if self._config.enable_rag and self._retriever: await self._retrieve_evidence(ctx) + await self._build_system_prompt(ctx) + + # Step 8: LLM streaming generation full_reply = "" - if self._llm_client: + # If flow or intent already handled, stream the pre-determined response + if ctx.diagnostics.get("flow_handled") or ctx.diagnostics.get("intent_handled"): + if ctx.llm_response: + # Stream the pre-determined response character by character + import asyncio + for char in ctx.llm_response.content: + if not state_machine.can_send_message(): + break + yield create_message_event(delta=char) + full_reply += char + await asyncio.sleep(0.01) + elif self._llm_client: async for event in self._stream_from_llm(ctx, state_machine): if event.event == "message": full_reply += self._extract_delta_from_event(event) @@ -588,13 +1041,20 @@ class OrchestratorService: finish_reason="stop", ) + # Step 9: OutputFilter (on complete reply) + await self._filter_output(ctx) + + # Step 10: Confidence self._calculate_confidence(ctx) + # Step 11: Memory await self._save_messages(ctx) + # Step 12: Return final event if await state_machine.transition_to_final(): + final_reply = ctx.filtered_reply or full_reply yield create_final_event( - reply=full_reply, + reply=final_reply, confidence=ctx.confidence_result.confidence if ctx.confidence_result else 0.5, should_transfer=ctx.confidence_result.should_transfer if ctx.confidence_result else False, transfer_reason=ctx.confidence_result.transfer_reason if ctx.confidence_result else None, diff --git a/ai-service/app/services/prompt/template_service.py b/ai-service/app/services/prompt/template_service.py index 4e21ff9..e9b3eda 100644 --- a/ai-service/app/services/prompt/template_service.py +++ b/ai-service/app/services/prompt/template_service.py @@ -6,22 +6,23 @@ Prompt template service for AI Service. import logging import time import uuid +from collections.abc import Sequence from datetime import datetime -from typing import Any, Sequence +from typing import Any -from sqlalchemy import select, func +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from sqlmodel import col +from app.core.prompts import SYSTEM_PROMPT from app.models.entities import ( PromptTemplate, - PromptTemplateVersion, PromptTemplateCreate, PromptTemplateUpdate, + PromptTemplateVersion, TemplateVersionStatus, ) from app.services.prompt.variable_resolver import VariableResolver -from app.core.prompts import SYSTEM_PROMPT logger = logging.getLogger(__name__) @@ -74,7 +75,7 @@ _template_cache = TemplateCache() class PromptTemplateService: """ [AC-AISVC-52~AC-AISVC-58] Service for managing prompt templates. - + Features: - Template CRUD with tenant isolation - Version management (auto-create new version on update) @@ -132,10 +133,10 @@ class PromptTemplateService: stmt = select(PromptTemplate).where( PromptTemplate.tenant_id == tenant_id ) - + if scene: stmt = stmt.where(PromptTemplate.scene == scene) - + stmt = stmt.order_by(col(PromptTemplate.created_at).desc()) result = await self._session.execute(stmt) return result.scalars().all() @@ -168,13 +169,13 @@ class PromptTemplateService: return None versions = await self._get_versions(template_id) - + current_version = None for v in versions: if v.status == TemplateVersionStatus.PUBLISHED.value: current_version = v break - + return { "id": str(template.id), "name": template.name, @@ -225,7 +226,7 @@ class PromptTemplateService: if update_data.system_instruction is not None: latest_version = await self._get_latest_version(template_id) new_version_num = (latest_version.version + 1) if latest_version else 1 - + new_version = PromptTemplateVersion( template_id=template_id, version=new_version_num, @@ -236,9 +237,9 @@ class PromptTemplateService: self._session.add(new_version) await self._session.flush() - + self._cache.invalidate(tenant_id, template.scene) - + logger.info( f"[AC-AISVC-53] Updated prompt template: tenant={tenant_id}, id={template_id}" ) @@ -259,26 +260,26 @@ class PromptTemplateService: return False versions = await self._get_versions(template_id) - + for v in versions: if v.status == TemplateVersionStatus.PUBLISHED.value: v.status = TemplateVersionStatus.ARCHIVED.value - + target_version = None for v in versions: if v.version == version: target_version = v break - + if not target_version: return False - + target_version.status = TemplateVersionStatus.PUBLISHED.value await self._session.flush() - + self._cache.invalidate(tenant_id, template.scene) self._cache.set(tenant_id, template.scene, target_version) - + logger.info( f"[AC-AISVC-54] Published template version: tenant={tenant_id}, " f"template_id={template_id}, version={version}" @@ -304,7 +305,7 @@ class PromptTemplateService: ) -> str: """ [AC-AISVC-51, AC-AISVC-56] Get the published template for a scene. - + Resolution order: 1. Check in-memory cache 2. Query database for published version @@ -402,12 +403,12 @@ class PromptTemplateService: versions = await self._get_versions(template_id) for v in versions: await self._session.delete(v) - + await self._session.delete(template) await self._session.flush() - + self._cache.invalidate(tenant_id, template.scene) - + logger.info( f"Deleted prompt template: tenant={tenant_id}, id={template_id}" ) diff --git a/ai-service/app/services/prompt/variable_resolver.py b/ai-service/app/services/prompt/variable_resolver.py index d59d480..59c8def 100644 --- a/ai-service/app/services/prompt/variable_resolver.py +++ b/ai-service/app/services/prompt/variable_resolver.py @@ -24,7 +24,7 @@ BUILTIN_VARIABLES = { class VariableResolver: """ [AC-AISVC-56] Variable replacement engine for prompt templates. - + Supports: - Built-in variables: persona_name, current_time, channel_type, tenant_name, session_id - Custom variables: defined in template with defaults @@ -50,17 +50,17 @@ class VariableResolver: ) -> str: """ Resolve all {{variable}} placeholders in the template. - + Args: template: Template string with {{variable}} placeholders variables: Custom variable definitions from template extra_context: Additional context for resolution - + Returns: Template with all variables replaced """ context = self._build_context(variables, extra_context) - + def replace_var(match: re.Match) -> str: var_name = match.group(1) if var_name in context: @@ -70,7 +70,7 @@ class VariableResolver: return str(value) logger.warning(f"Unknown variable in template: {var_name}") return match.group(0) - + resolved = VARIABLE_PATTERN.sub(replace_var, template) return resolved @@ -81,32 +81,32 @@ class VariableResolver: ) -> dict[str, Any]: """Build the complete context for variable resolution.""" context = {} - + for key, value in BUILTIN_VARIABLES.items(): if key in self._context: context[key] = self._context[key] else: context[key] = value - + if variables: for var in variables: name = var.get("name") default = var.get("default", "") if name: context[name] = default - + if extra_context: context.update(extra_context) - + return context def extract_variables(self, template: str) -> list[str]: """ Extract all variable names from a template. - + Args: template: Template string - + Returns: List of variable names found in the template """ @@ -119,24 +119,24 @@ class VariableResolver: ) -> dict[str, Any]: """ Validate that all variables in template are defined. - + Args: template: Template string defined_variables: Variables defined in template metadata - + Returns: Dict with 'valid' boolean and 'missing' list """ used_vars = set(self.extract_variables(template)) builtin_vars = set(BUILTIN_VARIABLES.keys()) - + defined_names = set() if defined_variables: defined_names = {v.get("name") for v in defined_variables if v.get("name")} - + available_vars = builtin_vars | defined_names missing = used_vars - available_vars - + return { "valid": len(missing) == 0, "missing": list(missing), diff --git a/ai-service/app/services/retrieval/__init__.py b/ai-service/app/services/retrieval/__init__.py index d6865d4..7fe6292 100644 --- a/ai-service/app/services/retrieval/__init__.py +++ b/ai-service/app/services/retrieval/__init__.py @@ -10,28 +10,28 @@ from app.services.retrieval.base import ( RetrievalHit, RetrievalResult, ) -from app.services.retrieval.vector_retriever import VectorRetriever, get_vector_retriever +from app.services.retrieval.indexer import ( + IndexingProgress, + IndexingResult, + KnowledgeIndexer, + get_knowledge_indexer, +) from app.services.retrieval.metadata import ( ChunkMetadata, ChunkMetadataModel, - MetadataFilter, KnowledgeChunk, + MetadataFilter, + RetrievalStrategy, RetrieveRequest, RetrieveResult, - RetrievalStrategy, ) from app.services.retrieval.optimized_retriever import ( OptimizedRetriever, - get_optimized_retriever, - TwoStageResult, RRFCombiner, + TwoStageResult, + get_optimized_retriever, ) -from app.services.retrieval.indexer import ( - KnowledgeIndexer, - get_knowledge_indexer, - IndexingProgress, - IndexingResult, -) +from app.services.retrieval.vector_retriever import VectorRetriever, get_vector_retriever __all__ = [ "BaseRetriever", @@ -41,6 +41,7 @@ __all__ = [ "VectorRetriever", "get_vector_retriever", "ChunkMetadata", + "ChunkMetadataModel", "MetadataFilter", "KnowledgeChunk", "RetrieveRequest", diff --git a/ai-service/app/services/retrieval/indexer.py b/ai-service/app/services/retrieval/indexer.py index d701c57..b3b981e 100644 --- a/ai-service/app/services/retrieval/indexer.py +++ b/ai-service/app/services/retrieval/indexer.py @@ -3,7 +3,6 @@ Knowledge base indexing service with optimized embedding. Reference: rag-optimization/spec.md Section 5.1 """ -import asyncio import logging import uuid from dataclasses import dataclass, field @@ -12,7 +11,7 @@ from typing import Any from app.core.config import get_settings from app.core.qdrant_client import QdrantClient, get_qdrant_client -from app.services.embedding.nomic_provider import NomicEmbeddingProvider, NomicEmbeddingResult +from app.services.embedding.nomic_provider import NomicEmbeddingProvider from app.services.retrieval.metadata import ChunkMetadata, KnowledgeChunk logger = logging.getLogger(__name__) @@ -27,13 +26,13 @@ class IndexingProgress: failed_chunks: int = 0 current_document: str = "" started_at: datetime = field(default_factory=datetime.utcnow) - + @property def progress_percent(self) -> int: if self.total_chunks == 0: return 0 return int((self.processed_chunks / self.total_chunks) * 100) - + @property def elapsed_seconds(self) -> float: return (datetime.utcnow() - self.started_at).total_seconds() @@ -53,14 +52,14 @@ class IndexingResult: class KnowledgeIndexer: """ Knowledge base indexer with optimized embedding. - + Features: - Task prefixes (search_document:) for document embedding - Multi-dimensional vectors (256/512/768) - Metadata support - Batch processing """ - + def __init__( self, qdrant_client: QdrantClient | None = None, @@ -75,12 +74,12 @@ class KnowledgeIndexer: self._chunk_overlap = chunk_overlap self._batch_size = batch_size self._progress: IndexingProgress | None = None - + async def _get_client(self) -> QdrantClient: if self._qdrant_client is None: self._qdrant_client = await get_qdrant_client() return self._qdrant_client - + async def _get_embedding_provider(self) -> NomicEmbeddingProvider: if self._embedding_provider is None: self._embedding_provider = NomicEmbeddingProvider( @@ -89,30 +88,30 @@ class KnowledgeIndexer: dimension=settings.qdrant_vector_size, ) return self._embedding_provider - + def chunk_text(self, text: str, metadata: ChunkMetadata | None = None) -> list[KnowledgeChunk]: """ Split text into chunks for indexing. Each line becomes a separate chunk for better retrieval granularity. - + Args: text: Full text to chunk metadata: Metadata to attach to each chunk - + Returns: List of KnowledgeChunk objects """ chunks = [] doc_id = str(uuid.uuid4()) - + lines = text.split('\n') - + for i, line in enumerate(lines): line = line.strip() - + if len(line) < 10: continue - + chunk = KnowledgeChunk( chunk_id=f"{doc_id}_{i}", document_id=doc_id, @@ -120,9 +119,9 @@ class KnowledgeIndexer: metadata=metadata or ChunkMetadata(), ) chunks.append(chunk) - + return chunks - + def chunk_text_by_lines( self, text: str, @@ -132,25 +131,25 @@ class KnowledgeIndexer: ) -> list[KnowledgeChunk]: """ Split text by lines, each line is a separate chunk. - + Args: text: Full text to chunk metadata: Metadata to attach to each chunk min_line_length: Minimum line length to be indexed merge_short_lines: Whether to merge consecutive short lines - + Returns: List of KnowledgeChunk objects """ chunks = [] doc_id = str(uuid.uuid4()) - + lines = text.split('\n') - + if merge_short_lines: merged_lines = [] current_line = "" - + for line in lines: line = line.strip() if not line: @@ -158,27 +157,27 @@ class KnowledgeIndexer: merged_lines.append(current_line) current_line = "" continue - + if current_line: current_line += " " + line else: current_line = line - + if len(current_line) >= min_line_length * 2: merged_lines.append(current_line) current_line = "" - + if current_line: merged_lines.append(current_line) - + lines = merged_lines - + for i, line in enumerate(lines): line = line.strip() - + if len(line) < min_line_length: continue - + chunk = KnowledgeChunk( chunk_id=f"{doc_id}_{i}", document_id=doc_id, @@ -186,9 +185,9 @@ class KnowledgeIndexer: metadata=metadata or ChunkMetadata(), ) chunks.append(chunk) - + return chunks - + async def index_document( self, tenant_id: str, @@ -198,40 +197,40 @@ class KnowledgeIndexer: ) -> IndexingResult: """ Index a single document with optimized embedding. - + Args: tenant_id: Tenant identifier document_id: Document identifier text: Document text content metadata: Optional metadata for the document - + Returns: IndexingResult with status and statistics """ start_time = datetime.utcnow() - + try: client = await self._get_client() provider = await self._get_embedding_provider() - + await client.ensure_collection_exists(tenant_id, use_multi_vector=True) - + chunks = self.chunk_text(text, metadata) - + self._progress = IndexingProgress( total_chunks=len(chunks), current_document=document_id, ) - + points = [] for i, chunk in enumerate(chunks): try: embedding_result = await provider.embed_document(chunk.content) - + chunk.embedding_full = embedding_result.embedding_full chunk.embedding_256 = embedding_result.embedding_256 chunk.embedding_512 = embedding_result.embedding_512 - + point = { "id": str(uuid.uuid4()), # Generate a valid UUID for Qdrant "vector": { @@ -248,27 +247,27 @@ class KnowledgeIndexer: } } points.append(point) - + self._progress.processed_chunks += 1 - + logger.debug( f"[RAG-OPT] Indexed chunk {i+1}/{len(chunks)} for doc={document_id}" ) - + except Exception as e: logger.warning(f"[RAG-OPT] Failed to index chunk {i}: {e}") self._progress.failed_chunks += 1 - + if points: await client.upsert_multi_vector(tenant_id, points) - + elapsed = (datetime.utcnow() - start_time).total_seconds() - + logger.info( f"[RAG-OPT] Indexed document {document_id}: " f"{len(points)} chunks in {elapsed:.2f}s" ) - + return IndexingResult( success=True, total_chunks=len(chunks), @@ -276,11 +275,11 @@ class KnowledgeIndexer: failed_chunks=self._progress.failed_chunks, elapsed_seconds=elapsed, ) - + except Exception as e: elapsed = (datetime.utcnow() - start_time).total_seconds() logger.error(f"[RAG-OPT] Failed to index document {document_id}: {e}") - + return IndexingResult( success=False, total_chunks=0, @@ -289,7 +288,7 @@ class KnowledgeIndexer: elapsed_seconds=elapsed, error_message=str(e), ) - + async def index_documents_batch( self, tenant_id: str, @@ -297,7 +296,7 @@ class KnowledgeIndexer: ) -> list[IndexingResult]: """ Index multiple documents in batch. - + Args: tenant_id: Tenant identifier documents: List of documents with format: @@ -306,12 +305,12 @@ class KnowledgeIndexer: "text": str, "metadata": ChunkMetadata (optional) } - + Returns: List of IndexingResult for each document """ results = [] - + for doc in documents: result = await self.index_document( tenant_id=tenant_id, @@ -320,9 +319,9 @@ class KnowledgeIndexer: metadata=doc.get("metadata"), ) results.append(result) - + return results - + def get_progress(self) -> IndexingProgress | None: """Get current indexing progress.""" return self._progress diff --git a/ai-service/app/services/retrieval/metadata.py b/ai-service/app/services/retrieval/metadata.py index 3dbe753..4374309 100644 --- a/ai-service/app/services/retrieval/metadata.py +++ b/ai-service/app/services/retrieval/metadata.py @@ -50,7 +50,7 @@ class ChunkMetadata: valid_until: date | None = None priority: int = 5 keywords: list[str] = field(default_factory=list) - + def to_dict(self) -> dict[str, Any]: """Convert to dictionary for storage.""" return { @@ -65,7 +65,7 @@ class ChunkMetadata: "priority": self.priority, "keywords": self.keywords, } - + @classmethod def from_dict(cls, data: dict[str, Any]) -> "ChunkMetadata": """Create from dictionary.""" @@ -95,29 +95,29 @@ class MetadataFilter: valid_only: bool = True min_priority: int | None = None keywords: list[str] | None = None - + def to_qdrant_filter(self) -> dict[str, Any] | None: """Convert to Qdrant filter format.""" conditions = [] - + if self.categories: conditions.append({ "key": "metadata.category", "match": {"any": self.categories} }) - + if self.departments: conditions.append({ "key": "metadata.department", "match": {"any": self.departments} }) - + if self.target_audiences: conditions.append({ "key": "metadata.target_audience", "match": {"any": self.target_audiences} }) - + if self.valid_only: today = date.today().isoformat() conditions.append({ @@ -126,19 +126,19 @@ class MetadataFilter: {"key": "metadata.valid_until", "range": {"gte": today}} ] }) - + if self.min_priority is not None: conditions.append({ "key": "metadata.priority", "range": {"lte": self.min_priority} }) - + if not conditions: return None - + if len(conditions) == 1: return {"must": conditions} - + return {"must": conditions} @@ -157,7 +157,7 @@ class KnowledgeChunk: metadata: ChunkMetadata = field(default_factory=ChunkMetadata) created_at: datetime = field(default_factory=datetime.utcnow) updated_at: datetime = field(default_factory=datetime.utcnow) - + def to_qdrant_point(self, point_id: int | str) -> dict[str, Any]: """Convert to Qdrant point format.""" return { @@ -189,7 +189,7 @@ class RetrieveRequest: top_k: int = 10 filters: MetadataFilter | None = None strategy: RetrievalStrategy = RetrievalStrategy.HYBRID - + def __post_init__(self): if not self.query_with_prefix: self.query_with_prefix = f"search_query:{self.query}" diff --git a/ai-service/app/services/retrieval/optimized_retriever.py b/ai-service/app/services/retrieval/optimized_retriever.py index 2150552..b1c2a39 100644 --- a/ai-service/app/services/retrieval/optimized_retriever.py +++ b/ai-service/app/services/retrieval/optimized_retriever.py @@ -6,7 +6,7 @@ Reference: rag-optimization/spec.md Section 2.2, 2.4, 2.5 import asyncio import logging import re -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any from app.core.config import get_settings @@ -19,10 +19,7 @@ from app.services.retrieval.base import ( RetrievalResult, ) from app.services.retrieval.metadata import ( - ChunkMetadata, - MetadataFilter, RetrieveResult, - RetrievalStrategy, ) logger = logging.getLogger(__name__) @@ -42,14 +39,14 @@ class RRFCombiner: """ Reciprocal Rank Fusion for combining multiple retrieval results. Reference: rag-optimization/spec.md Section 2.5 - + Formula: score = Σ(1 / (k + rank_i)) Default k = 60 """ - + def __init__(self, k: int = 60): self._k = k - + def combine( self, vector_results: list[dict[str, Any]], @@ -59,22 +56,22 @@ class RRFCombiner: ) -> list[dict[str, Any]]: """ Combine vector and BM25 results using RRF. - + Args: vector_results: Results from vector search bm25_results: Results from BM25 search vector_weight: Weight for vector results bm25_weight: Weight for BM25 results - + Returns: Combined and sorted results """ combined_scores: dict[str, dict[str, Any]] = {} - + for rank, result in enumerate(vector_results): chunk_id = result.get("chunk_id") or result.get("id", str(rank)) rrf_score = vector_weight / (self._k + rank + 1) - + if chunk_id not in combined_scores: combined_scores[chunk_id] = { "score": 0.0, @@ -91,13 +88,13 @@ class RRFCombiner: combined_scores[chunk_id]["vector_rank"] = rank if result.get("vector"): combined_scores[chunk_id]["vector"] = result.get("vector") - + combined_scores[chunk_id]["score"] += rrf_score - + for rank, result in enumerate(bm25_results): chunk_id = result.get("chunk_id") or result.get("id", str(rank)) rrf_score = bm25_weight / (self._k + rank + 1) - + if chunk_id not in combined_scores: combined_scores[chunk_id] = { "score": 0.0, @@ -112,15 +109,15 @@ class RRFCombiner: else: combined_scores[chunk_id]["bm25_score"] = result.get("score", 0.0) combined_scores[chunk_id]["bm25_rank"] = rank - + combined_scores[chunk_id]["score"] += rrf_score - + sorted_results = sorted( combined_scores.values(), key=lambda x: x["score"], reverse=True ) - + return sorted_results @@ -131,10 +128,10 @@ class OptimizedRetriever(BaseRetriever): - Two-stage retrieval (256 dim -> 768 dim) - RRF hybrid ranking (vector + BM25) - Metadata filtering - + Reference: rag-optimization/spec.md Section 2, 3, 4 """ - + def __init__( self, qdrant_client: QdrantClient | None = None, @@ -155,12 +152,12 @@ class OptimizedRetriever(BaseRetriever): self._hybrid_enabled = hybrid_enabled if hybrid_enabled is not None else settings.rag_hybrid_enabled self._rrf_k = rrf_k or settings.rag_rrf_k self._rrf_combiner = RRFCombiner(k=self._rrf_k) - + async def _get_client(self) -> QdrantClient: if self._qdrant_client is None: self._qdrant_client = await get_qdrant_client() return self._qdrant_client - + async def _get_embedding_provider(self) -> NomicEmbeddingProvider: from app.services.embedding.factory import get_embedding_config_manager manager = get_embedding_config_manager() @@ -173,11 +170,11 @@ class OptimizedRetriever(BaseRetriever): model=settings.ollama_embedding_model, dimension=settings.qdrant_vector_size, ) - + async def retrieve(self, ctx: RetrievalContext) -> RetrievalResult: """ Retrieve documents using optimized strategy. - + Strategy selection: 1. If two_stage_enabled: use two-stage retrieval 2. If hybrid_enabled: use RRF hybrid ranking @@ -191,17 +188,17 @@ class OptimizedRetriever(BaseRetriever): f"[RAG-OPT] Retrieval config: top_k={self._top_k}, " f"score_threshold={self._score_threshold}, min_hits={self._min_hits}" ) - + try: provider = await self._get_embedding_provider() logger.info(f"[RAG-OPT] Using embedding provider: {type(provider).__name__}") - + embedding_result = await provider.embed_query(ctx.query) logger.info( f"[RAG-OPT] Embedding generated: full_dim={len(embedding_result.embedding_full)}, " f"dim_256={'available' if embedding_result.embedding_256 else 'not available'}" ) - + if self._two_stage_enabled and self._hybrid_enabled: logger.info("[RAG-OPT] Using two-stage + hybrid retrieval strategy") results = await self._two_stage_hybrid_retrieve( @@ -232,9 +229,9 @@ class OptimizedRetriever(BaseRetriever): embedding_result.embedding_full, self._top_k, ) - + logger.info(f"[RAG-OPT] Raw results count: {len(results)}") - + retrieval_hits = [ RetrievalHit( text=result.get("payload", {}).get("text", ""), @@ -245,15 +242,15 @@ class OptimizedRetriever(BaseRetriever): for result in results if result.get("score", 0.0) >= self._score_threshold ] - + filtered_count = len(results) - len(retrieval_hits) if filtered_count > 0: logger.info( f"[RAG-OPT] Filtered out {filtered_count} results below threshold {self._score_threshold}" ) - + is_insufficient = len(retrieval_hits) < self._min_hits - + diagnostics = { "query_length": len(ctx.query), "top_k": self._top_k, @@ -266,30 +263,30 @@ class OptimizedRetriever(BaseRetriever): "raw_results_count": len(results), "filtered_below_threshold": filtered_count, } - + logger.info( f"[RAG-OPT] Retrieval complete: {len(retrieval_hits)} hits, " f"insufficient={is_insufficient}, max_score={diagnostics['max_score']:.3f}" ) - + if len(retrieval_hits) == 0: logger.warning( f"[RAG-OPT] No hits found! tenant={ctx.tenant_id}, query={ctx.query[:50]}..., " f"raw_results={len(results)}, threshold={self._score_threshold}" ) - + return RetrievalResult( hits=retrieval_hits, diagnostics=diagnostics, ) - + except Exception as e: logger.error(f"[RAG-OPT] Retrieval error: {e}", exc_info=True) return RetrievalResult( hits=[], diagnostics={"error": str(e), "is_insufficient": True}, ) - + async def _two_stage_retrieve( self, tenant_id: str, @@ -298,16 +295,16 @@ class OptimizedRetriever(BaseRetriever): ) -> list[dict[str, Any]]: """ Two-stage retrieval using Matryoshka dimensions. - + Stage 1: Fast retrieval with 256-dim vectors Stage 2: Precise reranking with 768-dim vectors - + Reference: rag-optimization/spec.md Section 2.4 """ import time - + client = await self._get_client() - + stage1_start = time.perf_counter() candidates = await self._search_with_dimension( client, tenant_id, embedding_result.embedding_256, "dim_256", @@ -315,22 +312,22 @@ class OptimizedRetriever(BaseRetriever): with_vectors=True, ) stage1_latency = (time.perf_counter() - stage1_start) * 1000 - + logger.info( f"[RAG-OPT] Stage 1: {len(candidates)} candidates in {stage1_latency:.2f}ms" ) - + stage2_start = time.perf_counter() reranked = [] for candidate in candidates: vector_data = candidate.get("vector", {}) stored_full_embedding = None - + if isinstance(vector_data, dict): stored_full_embedding = vector_data.get("full", []) elif isinstance(vector_data, list): stored_full_embedding = vector_data - + if stored_full_embedding and len(stored_full_embedding) > 0: similarity = self._cosine_similarity( embedding_result.embedding_full, @@ -339,17 +336,17 @@ class OptimizedRetriever(BaseRetriever): candidate["score"] = similarity candidate["stage"] = "reranked" reranked.append(candidate) - + reranked.sort(key=lambda x: x.get("score", 0), reverse=True) results = reranked[:top_k] stage2_latency = (time.perf_counter() - stage2_start) * 1000 - + logger.info( f"[RAG-OPT] Stage 2: {len(results)} final results in {stage2_latency:.2f}ms" ) - + return results - + async def _hybrid_retrieve( self, tenant_id: str, @@ -359,39 +356,39 @@ class OptimizedRetriever(BaseRetriever): ) -> list[dict[str, Any]]: """ Hybrid retrieval using RRF to combine vector and BM25 results. - + Reference: rag-optimization/spec.md Section 2.5 """ client = await self._get_client() - + vector_task = self._search_with_dimension( client, tenant_id, embedding_result.embedding_full, "full", top_k * 2 ) - + bm25_task = self._bm25_search(client, tenant_id, query, top_k * 2) - + vector_results, bm25_results = await asyncio.gather( vector_task, bm25_task, return_exceptions=True ) - + if isinstance(vector_results, Exception): logger.warning(f"[RAG-OPT] Vector search failed: {vector_results}") vector_results = [] - + if isinstance(bm25_results, Exception): logger.warning(f"[RAG-OPT] BM25 search failed: {bm25_results}") bm25_results = [] - + combined = self._rrf_combiner.combine( vector_results, bm25_results, vector_weight=settings.rag_vector_weight, bm25_weight=settings.rag_bm25_weight, ) - + return combined[:top_k] - + async def _two_stage_hybrid_retrieve( self, tenant_id: str, @@ -401,64 +398,64 @@ class OptimizedRetriever(BaseRetriever): ) -> list[dict[str, Any]]: """ Two-stage + Hybrid retrieval strategy. - + Stage 1: Fast retrieval with 256-dim vectors + BM25 in parallel Stage 2: RRF fusion + Precise reranking with 768-dim vectors - + This combines the best of both worlds: - Two-stage: Speed from 256-dim, precision from 768-dim reranking - Hybrid: Semantic matching from vectors, keyword matching from BM25 """ import time - + client = await self._get_client() - + stage1_start = time.perf_counter() - + vector_task = self._search_with_dimension( client, tenant_id, embedding_result.embedding_256, "dim_256", top_k * self._two_stage_expand_factor, with_vectors=True, ) - + bm25_task = self._bm25_search(client, tenant_id, query, top_k * self._two_stage_expand_factor) - + vector_results, bm25_results = await asyncio.gather( vector_task, bm25_task, return_exceptions=True ) - + if isinstance(vector_results, Exception): logger.warning(f"[RAG-OPT] Vector search failed: {vector_results}") vector_results = [] - + if isinstance(bm25_results, Exception): logger.warning(f"[RAG-OPT] BM25 search failed: {bm25_results}") bm25_results = [] - + stage1_latency = (time.perf_counter() - stage1_start) * 1000 logger.info( f"[RAG-OPT] Two-stage Hybrid Stage 1: vector={len(vector_results)}, bm25={len(bm25_results)}, latency={stage1_latency:.2f}ms" ) - + stage2_start = time.perf_counter() - + combined = self._rrf_combiner.combine( vector_results, bm25_results, vector_weight=settings.rag_vector_weight, bm25_weight=settings.rag_bm25_weight, ) - + reranked = [] for candidate in combined[:top_k * 2]: vector_data = candidate.get("vector", {}) stored_full_embedding = None - + if isinstance(vector_data, dict): stored_full_embedding = vector_data.get("full", []) elif isinstance(vector_data, list): stored_full_embedding = vector_data - + if stored_full_embedding and len(stored_full_embedding) > 0: similarity = self._cosine_similarity( embedding_result.embedding_full, @@ -467,17 +464,17 @@ class OptimizedRetriever(BaseRetriever): candidate["score"] = similarity candidate["stage"] = "two_stage_hybrid_reranked" reranked.append(candidate) - + reranked.sort(key=lambda x: x.get("score", 0), reverse=True) results = reranked[:top_k] stage2_latency = (time.perf_counter() - stage2_start) * 1000 - + logger.info( f"[RAG-OPT] Two-stage Hybrid Stage 2 (reranking): {len(results)} final results in {stage2_latency:.2f}ms" ) - + return results - + async def _vector_retrieve( self, tenant_id: str, @@ -489,7 +486,7 @@ class OptimizedRetriever(BaseRetriever): return await self._search_with_dimension( client, tenant_id, embedding, "full", top_k ) - + async def _search_with_dimension( self, client: QdrantClient, @@ -505,7 +502,7 @@ class OptimizedRetriever(BaseRetriever): f"[RAG-OPT] Searching with vector_name={vector_name}, " f"limit={limit}, vector_dim={len(query_vector)}, with_vectors={with_vectors}" ) - + results = await client.search( tenant_id=tenant_id, query_vector=query_vector, @@ -513,17 +510,17 @@ class OptimizedRetriever(BaseRetriever): vector_name=vector_name, with_vectors=with_vectors, ) - + logger.info( f"[RAG-OPT] Search returned {len(results)} results" ) - + if len(results) > 0: for i, r in enumerate(results[:3]): logger.debug( f"[RAG-OPT] Result {i+1}: id={r['id']}, score={r['score']:.4f}" ) - + return results except Exception as e: logger.error( @@ -531,7 +528,7 @@ class OptimizedRetriever(BaseRetriever): exc_info=True ) return [] - + async def _bm25_search( self, client: QdrantClient, @@ -546,15 +543,15 @@ class OptimizedRetriever(BaseRetriever): try: qdrant = await client.get_client() collection_name = client.get_collection_name(tenant_id) - + query_terms = set(re.findall(r'\w+', query.lower())) - + results = await qdrant.scroll( collection_name=collection_name, limit=limit * 3, with_payload=True, ) - + scored_results = [] for point in results[0]: text = point.payload.get("text", "").lower() @@ -567,21 +564,21 @@ class OptimizedRetriever(BaseRetriever): "score": score, "payload": point.payload or {}, }) - + scored_results.sort(key=lambda x: x["score"], reverse=True) return scored_results[:limit] - + except Exception as e: logger.debug(f"[RAG-OPT] BM25 search failed: {e}") return [] - + def _cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float: """Calculate cosine similarity between two vectors.""" import numpy as np a = np.array(vec1) b = np.array(vec2) return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))) - + async def health_check(self) -> bool: """Check if retriever is healthy.""" try: diff --git a/ai-service/app/services/retrieval/vector_retriever.py b/ai-service/app/services/retrieval/vector_retriever.py index eba3fa0..ad10833 100644 --- a/ai-service/app/services/retrieval/vector_retriever.py +++ b/ai-service/app/services/retrieval/vector_retriever.py @@ -4,7 +4,6 @@ Vector retriever for AI Service. """ import logging -from typing import Any from app.core.config import get_settings from app.core.qdrant_client import QdrantClient, get_qdrant_client @@ -84,7 +83,7 @@ class VectorRetriever(BaseRetriever): limit=self._top_k, score_threshold=self._score_threshold, ) - + logger.info(f"[AC-AISVC-16] Search returned {len(hits)} raw hits") retrieval_hits = [ @@ -115,7 +114,7 @@ class VectorRetriever(BaseRetriever): f"[AC-AISVC-17] Retrieval complete: {len(retrieval_hits)} hits, " f"insufficient={is_insufficient}, max_score={diagnostics['max_score']:.3f}" ) - + if len(retrieval_hits) == 0: logger.warning( f"[AC-AISVC-17] No hits found! tenant={ctx.tenant_id}, " @@ -140,7 +139,7 @@ class VectorRetriever(BaseRetriever): [AC-AISVC-29] Uses configured embedding provider. """ from app.services.embedding import get_embedding_provider - + provider = await get_embedding_provider() return await provider.embed(text) diff --git a/ai-service/docs/progress/phase11_multi_kb_progress.md b/ai-service/docs/progress/phase11_multi_kb_progress.md new file mode 100644 index 0000000..83721ed --- /dev/null +++ b/ai-service/docs/progress/phase11_multi_kb_progress.md @@ -0,0 +1,178 @@ +# Phase 11 多知识库管理 - 实现进度报告 + +**日期**: 2026-02-27 +**版本**: v0.6.0 +**状态**: T11.1~T11.5 已完成 + +--- + +## 1. 任务完成情况 + +| 任务ID | 描述 | 状态 | 验收标准 | +|--------|------|------|----------| +| T11.1 | 扩展 KnowledgeBase 实体 | ✅ 完成 | AC-AISVC-59 | +| T11.2 | 实现知识库 CRUD 服务 | ✅ 完成 | AC-AISVC-59, AC-AISVC-61, AC-AISVC-62 | +| T11.3 | 实现知识库管理 API | ✅ 完成 | AC-AISVC-59, AC-AISVC-60, AC-AISVC-61, AC-AISVC-62 | +| T11.4 | 升级 Qdrant Collection 命名 | ✅ 完成 | AC-AISVC-63 | +| T11.5 | 修改文档上传流程 | ✅ 完成 | AC-AISVC-63 | +| T11.6 | 修改 OptimizedRetriever | ⏳ 待处理 | AC-AISVC-64 | +| T11.7 | kb_default 迁移 | ⏳ 待处理 | AC-AISVC-59 | +| T11.8 | 单元测试 | ⏳ 待处理 | AC-AISVC-59~AC-AISVC-64 | + +--- + +## 2. 实现详情 + +### 2.1 T11.1 - 扩展 KnowledgeBase 实体 + +**文件**: `app/models/entities.py` + +新增字段: +- `kb_type`: 知识库类型 (product/faq/script/policy/general) +- `priority`: 优先级权重 (数值越大越优先) +- `is_enabled`: 是否启用 +- `doc_count`: 文档数量 (冗余统计) + +新增枚举: +```python +class KBType(str, Enum): + PRODUCT = "product" + FAQ = "faq" + SCRIPT = "script" + POLICY = "policy" + GENERAL = "general" +``` + +新增 Schema: +- `KnowledgeBaseCreate`: 创建知识库请求 +- `KnowledgeBaseUpdate`: 更新知识库请求 + +--- + +### 2.2 T11.2 - 知识库 CRUD 服务 + +**文件**: `app/services/knowledge_base_service.py` (新建) + +核心方法: +- `create_knowledge_base()`: 创建知识库并初始化 Qdrant Collection +- `get_knowledge_base()`: 获取单个知识库 +- `list_knowledge_bases()`: 列表查询 (支持按类型和状态过滤) +- `update_knowledge_base()`: 更新知识库 +- `delete_knowledge_base()`: 删除知识库及关联文档和 Collection +- `update_doc_count()`: 更新文档计数 +- `get_or_create_default_kb()`: 获取或创建默认知识库 + +--- + +### 2.3 T11.3 - 知识库管理 API + +**文件**: `app/api/admin/kb.py` + +新增端点: + +| 方法 | 路径 | 描述 | +|------|------|------| +| GET | /admin/kb/knowledge-bases | 查询知识库列表 | +| POST | /admin/kb/knowledge-bases | 创建知识库 | +| GET | /admin/kb/knowledge-bases/{kb_id} | 获取知识库详情 | +| PUT | /admin/kb/knowledge-bases/{kb_id} | 更新知识库 | +| DELETE | /admin/kb/knowledge-bases/{kb_id} | 删除知识库 | + +--- + +### 2.4 T11.4 - Qdrant Collection 命名升级 + +**文件**: `app/core/qdrant_client.py` + +命名规则: +- 旧格式 (兼容): `kb_{tenantId}` +- 新格式: `kb_{tenantId}_{kbId}` + +新增方法: +- `get_kb_collection_name()`: 获取知识库 Collection 名称 +- `ensure_kb_collection_exists()`: 确保知识库 Collection 存在 +- `delete_kb_collection()`: 删除知识库 Collection +- `search_kb()`: 多知识库检索 + +更新方法: +- `upsert_vectors()`: 新增 `kb_id` 参数 +- `upsert_multi_vector()`: 新增 `kb_id` 参数 + +--- + +### 2.5 T11.5 - 文档上传流程改造 + +**文件**: `app/api/admin/kb.py` + +变更: +1. `upload_document()` 端点支持指定 `kb_id` 参数 +2. 如果知识库不存在,自动使用默认知识库 +3. 上传后更新知识库文档计数 +4. `_index_document()` 函数新增 `kb_id` 参数 +5. 索引时使用 `ensure_kb_collection_exists()` 创建 Collection +6. 向量 payload 中新增 `kb_id` 字段 + +--- + +## 3. 数据库迁移 + +需要执行以下 SQL 来更新现有表结构: + +```sql +-- 添加新字段到 knowledge_bases 表 +ALTER TABLE knowledge_bases ADD COLUMN IF NOT EXISTS kb_type VARCHAR DEFAULT 'general'; +ALTER TABLE knowledge_bases ADD COLUMN IF NOT EXISTS priority INTEGER DEFAULT 0; +ALTER TABLE knowledge_bases ADD COLUMN IF NOT EXISTS is_enabled BOOLEAN DEFAULT TRUE; +ALTER TABLE knowledge_bases ADD COLUMN IF NOT EXISTS doc_count INTEGER DEFAULT 0; + +-- 创建索引 +CREATE INDEX IF NOT EXISTS ix_knowledge_bases_tenant_kb_type ON knowledge_bases (tenant_id, kb_type); +``` + +--- + +## 4. 待完成任务 + +### T11.6 - 修改 OptimizedRetriever +- 支持 `target_kb_ids` 参数 +- 实现多 Collection 并行检索 +- 按知识库优先级排序结果 + +### T11.7 - kb_default 迁移 +- 首次启动时为现有数据创建默认知识库记录 +- 将现有 `kb_{tenantId}` Collection 映射为默认知识库 + +### T11.8 - 单元测试 +- 知识库 CRUD 服务测试 +- 多知识库检索测试 +- Collection 命名兼容性测试 + +--- + +## 5. 验证清单 + +- [x] KnowledgeBase 实体包含新字段 +- [x] 知识库创建时自动创建 Qdrant Collection +- [x] 知识库删除时自动删除 Qdrant Collection +- [x] 文档上传支持指定 kb_id +- [x] 向量索引到正确的 Collection +- [x] Collection 命名兼容旧格式 +- [ ] 多知识库并行检索 (T11.6) +- [ ] kb_default 自动迁移 (T11.7) +- [ ] 单元测试通过 (T11.8) + +--- + +## 6. 相关文件 + +### 新建文件 +- `app/services/knowledge_base_service.py` + +### 修改文件 +- `app/models/entities.py` +- `app/core/qdrant_client.py` +- `app/api/admin/kb.py` + +--- + +**下一步**: 执行 T11.6~T11.8 或进入 Phase 12 (意图识别与规则引擎) diff --git a/ai-service/exports/conversations_default@ash@2026_97d97b90-a146-4141-9194-f24d7efcae0e.json b/ai-service/exports/conversations_default@ash@2026_97d97b90-a146-4141-9194-f24d7efcae0e.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/ai-service/exports/conversations_default@ash@2026_97d97b90-a146-4141-9194-f24d7efcae0e.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/ai-service/pyproject.toml b/ai-service/pyproject.toml index c497c23..28a0101 100644 --- a/ai-service/pyproject.toml +++ b/ai-service/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "pymupdf>=1.23.0", "pdfplumber>=0.10.0", "python-multipart>=0.0.6", + "redis>=5.0.0", ] [project.optional-dependencies] diff --git a/ai-service/scripts/migrations/002_add_monitoring_fields.sql b/ai-service/scripts/migrations/002_add_monitoring_fields.sql new file mode 100644 index 0000000..c1cf622 --- /dev/null +++ b/ai-service/scripts/migrations/002_add_monitoring_fields.sql @@ -0,0 +1,74 @@ +-- Migration: Add monitoring fields to chat_messages and create new monitoring tables +-- Date: 2026-02-27 +-- Issue: [AC-AISVC-91~AC-AISVC-95, AC-AISVC-108~AC-AISVC-110] Dashboard and monitoring enhancement + +-- Add monitoring fields to chat_messages +ALTER TABLE chat_messages +ADD COLUMN IF NOT EXISTS prompt_template_id UUID REFERENCES prompt_templates(id); + +ALTER TABLE chat_messages +ADD COLUMN IF NOT EXISTS intent_rule_id UUID REFERENCES intent_rules(id); + +ALTER TABLE chat_messages +ADD COLUMN IF NOT EXISTS flow_instance_id UUID REFERENCES flow_instances(id); + +ALTER TABLE chat_messages +ADD COLUMN IF NOT EXISTS guardrail_triggered BOOLEAN DEFAULT FALSE; + +ALTER TABLE chat_messages +ADD COLUMN IF NOT EXISTS guardrail_words JSONB; + +-- Add indexes for monitoring queries +CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_template +ON chat_messages (tenant_id, prompt_template_id); + +CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_intent +ON chat_messages (tenant_id, intent_rule_id); + +CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_flow +ON chat_messages (tenant_id, flow_instance_id); + +CREATE INDEX IF NOT EXISTS ix_chat_messages_guardrail +ON chat_messages (tenant_id, guardrail_triggered) +WHERE guardrail_triggered = TRUE; + +-- Create flow_test_records table +CREATE TABLE IF NOT EXISTS flow_test_records ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id VARCHAR NOT NULL, + session_id VARCHAR NOT NULL, + status VARCHAR DEFAULT 'success', + steps JSONB NOT NULL DEFAULT '[]', + final_response JSONB, + total_duration_ms INTEGER, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS ix_flow_test_records_tenant_created +ON flow_test_records (tenant_id, created_at); + +CREATE INDEX IF NOT EXISTS ix_flow_test_records_session +ON flow_test_records (session_id); + +-- Create export_tasks table +CREATE TABLE IF NOT EXISTS export_tasks ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id VARCHAR NOT NULL, + status VARCHAR DEFAULT 'processing', + file_path VARCHAR, + file_name VARCHAR, + file_size INTEGER, + total_rows INTEGER, + format VARCHAR DEFAULT 'json', + filters JSONB, + error_message TEXT, + expires_at TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS ix_export_tasks_tenant_status +ON export_tasks (tenant_id, status); + +CREATE INDEX IF NOT EXISTS ix_export_tasks_tenant_created +ON export_tasks (tenant_id, created_at); diff --git a/ai-service/scripts/migrations/add_chat_message_fields.py b/ai-service/scripts/migrations/add_chat_message_fields.py new file mode 100644 index 0000000..8494777 --- /dev/null +++ b/ai-service/scripts/migrations/add_chat_message_fields.py @@ -0,0 +1,57 @@ +""" +Migration script to add monitoring fields to chat_messages table. +Run: python scripts/migrations/add_chat_message_fields.py +""" + +import asyncio +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.orm import sessionmaker +from sqlmodel.ext.asyncio.session import AsyncSession + +from app.core.config import get_settings + + +async def run_migration(): + """Run the migration to add new columns to chat_messages table.""" + settings = get_settings() + engine = create_async_engine(settings.database_url, echo=True) + async_session_maker = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + statements = [ + "ALTER TABLE chat_messages ADD COLUMN IF NOT EXISTS prompt_template_id UUID", + "ALTER TABLE chat_messages ADD COLUMN IF NOT EXISTS intent_rule_id UUID", + "ALTER TABLE chat_messages ADD COLUMN IF NOT EXISTS flow_instance_id UUID", + "ALTER TABLE chat_messages ADD COLUMN IF NOT EXISTS guardrail_triggered BOOLEAN DEFAULT FALSE", + "ALTER TABLE chat_messages ADD COLUMN IF NOT EXISTS guardrail_words JSONB", + "CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_template ON chat_messages(tenant_id, prompt_template_id)", + "CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_intent ON chat_messages(tenant_id, intent_rule_id)", + "CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_flow ON chat_messages(tenant_id, flow_instance_id)", + ] + + async with async_session_maker() as session: + for statement in statements: + try: + await session.execute(text(statement)) + print(f"Executed: {statement[:60]}...") + except Exception as e: + error_str = str(e).lower() + if "already exists" in error_str or "duplicate" in error_str: + print(f"Skipped (already exists): {statement[:60]}...") + else: + print(f"Error: {e}") + raise + + await session.commit() + print("\nMigration completed successfully!") + + await engine.dispose() + + +if __name__ == "__main__": + asyncio.run(run_migration()) diff --git a/docs/intent-rule-usage-appendix.md b/docs/intent-rule-usage-appendix.md new file mode 100644 index 0000000..c156677 --- /dev/null +++ b/docs/intent-rule-usage-appendix.md @@ -0,0 +1,115 @@ +## 继续 - 意图规则使用指南补充内容 + +### 4.2 创建意图规则 - 响应示例 + +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "转人工意图", + "keywords": ["人工", "客服", "投诉", "找人"], + "patterns": ["转.*人工", "找.*客服", "我要.*投诉"], + "priority": 200, + "response_type": "transfer", + "target_kb_ids": [], + "flow_id": null, + "fixed_reply": null, + "transfer_message": "正在为您转接人工客服,请稍候...", + "is_enabled": true, + "hit_count": 0, + "created_at": "2026-02-27T12:00:00Z", + "updated_at": "2026-02-27T12:00:00Z" +} +``` + +### 4.3 查询意图规则列表 + +**接口**: `GET /admin/intent-rules` + +**查询参数**: +- `response_type` (可选): 按响应类型筛选 +- `is_enabled` (可选): 按启用状态筛选 + +**请求示例**: +```bash +curl -X GET http://ai-service:8080/admin/intent-rules \ + -H "X-API-Key: your_api_key" \ + -H "X-Tenant-Id: szmp@ash@2026" +``` + +### 4.4 更新意图规则 + +**接口**: `PUT /admin/intent-rules/{rule_id}` + +### 4.5 删除意图规则 + +**接口**: `DELETE /admin/intent-rules/{rule_id}` + +--- + +## 5. 实际使用场景 + +### 5.1 常见问题快速回复 + +使用 fixed 类型直接返回预设回复,跳过 LLM 调用,响应速度快且成本低。 + +### 5.2 专业领域定向检索 + +使用 rag 类型将问题路由到特定知识库,提高检索精准度。 + +### 5.3 复杂流程引导 + +使用 flow 类型启动多轮对话流程,自动收集用户信息。 + +### 5.4 敏感问题转人工 + +使用 transfer 类型直接转接人工客服。 + +--- + +## 6. 缓存机制 + +- **缓存位置**: 内存缓存 +- **缓存键**: tenant_id +- **TTL**: 60 秒 +- **失效时机**: 创建/更新/删除规则时立即清除 + +--- + +## 7. 最佳实践 + +### 7.1 关键词设计 + +- 长度: 2-6 个字 +- 使用完整词组 +- 避免太短或太长 + +### 7.2 优先级设计 + +- 200+: 紧急/敏感问题 +- 100-199: 重要业务流程 +- 50-99: 常规咨询 +- 0-49: 通用兜底规则 + +--- + +## 8. 故障排查 + +### 8.1 规则未生效 + +检查: 规则是否启用、缓存是否过期、优先级是否正确 + +### 8.2 误匹配问题 + +优化关键词和正则表达式,调整优先级 + +--- + +## 9. 总结 + +意图规则提供智能路由功能,支持四种响应类型,具有高性能缓存和租户隔离特性。 + +--- + +**文档版本**: v1.0 +**生成时间**: 2026-02-27 +**维护状态**: ✅ 活跃维护 diff --git a/docs/intent-rule-usage-final.txt b/docs/intent-rule-usage-final.txt new file mode 100644 index 0000000..24122b7 --- /dev/null +++ b/docs/intent-rule-usage-final.txt @@ -0,0 +1,38 @@ + + +--- + +## 5. 总结 + +意图规则(Intent Rule)是 AI 中台的智能路由系统,在 12 步生成流程的第 3 步执行。 + +### 5.1 核心流程 + +创建规则 → 设置关键词/正则 → 配置响应类型 → 启用规则 → 用户对话 → 意图匹配 → 路由处理 + +### 5.2 关键特性 + +- **智能路由**: 根据用户意图自动选择最佳处理方式 +- **优先级控制**: 灵活的优先级机制避免冲突 +- **四种响应**: 固定回复、RAG 检索、话术流程、转人工 +- **高性能**: 60 秒缓存 + 优化的匹配算法 +- **租户隔离**: 多租户数据完全独立 +- **命中统计**: 自动记录规则使用情况 + +### 5.3 最佳实践 + +1. **关键词设计**: 2-6 个字,使用完整词组 +2. **正则表达式**: 简单明了,避免过于复杂 +3. **优先级分配**: 200+ 紧急、100-199 重要、50-99 常规、0-49 兜底 +4. **响应类型**: 根据场景选择最合适的类型 +5. **测试验证**: 先低优先级测试,再调整为正式优先级 +6. **监控优化**: 定期检查命中率,优化关键词 + +--- + +**文档版本**: v1.0 +**生成时间**: 2026-02-27 +**维护状态**: ✅ 活跃维护 +**相关文档**: +- [AI 中台对接文档](../AI中台对接文档.md) +- [Prompt 模板管理分析](./prompt-template-analysis.md) diff --git a/docs/intent-rule-usage.md b/docs/intent-rule-usage.md new file mode 100644 index 0000000..b200db5 --- /dev/null +++ b/docs/intent-rule-usage.md @@ -0,0 +1,322 @@ +# 意图规则使用指南 + +## 1. 概述 + +意图规则(Intent Rule)是 AI 中台的智能路由系统,用于识别用户意图并自动路由到最合适的处理方式。它在 12 步生成流程的第 3 步执行,优先级高于默认的 RAG 检索。 + +### 1.1 核心特性 + +- ✅ **关键词匹配**:支持多个关键词的模糊匹配(不区分大小写) +- ✅ **正则表达式匹配**:支持复杂的模式匹配 +- ✅ **优先级排序**:按优先级从高到低匹配,命中第一个即停止 +- ✅ **四种响应类型**:固定回复、RAG 检索、话术流程、转人工 +- ✅ **租户隔离**:不同租户的规则完全独立 +- ✅ **缓存优化**:60 秒 TTL 内存缓存,减少数据库查询 +- ✅ **命中统计**:自动记录规则命中次数 + +### 1.2 在生成流程中的位置 + +``` +用户消息 → Step 1: 输入扫描 → Step 2: 流程检查 → Step 3: 意图匹配 → Step 4-12: 后续处理 +``` + +**意图匹配的作用**: +- 如果匹配成功 → 根据 response_type 路由到对应处理方式 +- 如果匹配失败 → 继续执行默认的 RAG 检索流程 + +--- + +## 2. 数据模型 + +### 2.1 IntentRule 实体 + +| 字段 | 类型 | 必填 | 说明 | +|-----|------|------|------| +| `id` | UUID | 自动生成 | 规则唯一标识 | +| `tenant_id` | string | ✅ | 租户 ID(格式:name@ash@year) | +| `name` | string | ✅ | 规则名称(如"退货意图") | +| `keywords` | string[] | ❌ | 关键词列表(如 ["退货", "退款"]) | +| `patterns` | string[] | ❌ | 正则表达式列表(如 ["退.*货", "如何退货"]) | +| `priority` | int | ❌ | 优先级(默认 0,数值越大优先级越高) | +| `response_type` | string | ✅ | 响应类型:fixed/rag/flow/transfer | +| `target_kb_ids` | string[] | ❌ | 目标知识库 ID 列表(rag 类型必填) | +| `flow_id` | UUID | ❌ | 话术流程 ID(flow 类型必填) | +| `fixed_reply` | string | ❌ | 固定回复内容(fixed 类型必填) | +| `transfer_message` | string | ❌ | 转人工提示语(transfer 类型必填) | +| `is_enabled` | bool | ✅ | 是否启用(默认 true) | +| `hit_count` | int | 自动 | 命中次数统计 | +| `created_at` | datetime | 自动 | 创建时间 | +| `updated_at` | datetime | 自动 | 更新时间 | + +### 2.2 响应类型详解 + +#### 2.2.1 fixed - 固定回复 + +**用途**:对于明确的问题,直接返回预设的固定回复,跳过 LLM 生成。 + +**适用场景**: +- 常见问候语("你好"、"在吗") +- 简单查询("营业时间"、"联系方式") +- 标准流程说明("如何下单"、"支付方式") + +**必填字段**:`fixed_reply` + +**示例**: +```json +{ + "name": "营业时间查询", + "keywords": ["营业时间", "几点开门", "几点关门"], + "response_type": "fixed", + "fixed_reply": "我们的营业时间是周一至周日 9:00-21:00,节假日正常营业。" +} +``` + +#### 2.2.2 rag - 定向知识库检索 + +**用途**:将用户问题路由到特定的知识库进行检索,而不是搜索所有知识库。 + +**适用场景**: +- 产品咨询 → 路由到产品知识库 +- 售后问题 → 路由到售后知识库 +- 政策查询 → 路由到政策知识库 + +**必填字段**:`target_kb_ids`(知识库 ID 列表) + +**示例**: +```json +{ + "name": "产品咨询意图", + "keywords": ["产品", "功能", "参数", "配置"], + "patterns": [".*产品.*", "有什么功能"], + "response_type": "rag", + "target_kb_ids": ["kb_product_001", "kb_product_002"] +} +``` + +#### 2.2.3 flow - 启动话术流程 + +**用途**:触发预定义的多轮对话流程(话术脚本)。 + +**适用场景**: +- 订单处理流程(收集地址、确认信息) +- 问题诊断流程(逐步排查问题) +- 信息采集流程(收集用户需求) + +**必填字段**:`flow_id`(话术流程 ID) + +**示例**: +```json +{ + "name": "退货流程意图", + "keywords": ["退货", "退款", "不想要了"], + "response_type": "flow", + "flow_id": "flow_return_process_001" +} +``` + +#### 2.2.4 transfer - 转人工 + +**用途**:直接转接到人工客服。 + +**适用场景**: +- 投诉建议 +- 复杂问题 +- 明确要求人工服务 + +**必填字段**:`transfer_message`(转人工提示语) + +**示例**: +```json +{ + "name": "转人工意图", + "keywords": ["人工", "客服", "投诉"], + "patterns": ["转.*人工", "找.*客服"], + "response_type": "transfer", + "transfer_message": "正在为您转接人工客服,请稍候..." +} +``` + +--- + +## 3. 匹配算法 + +### 3.1 匹配流程 + +``` +1. 加载启用的规则(is_enabled=true),按 priority DESC 排序 +2. 遍历规则列表(从高优先级到低优先级) +3. 对每个规则: + a. 先尝试关键词匹配(keywords) + b. 如果关键词未匹配,尝试正则表达式匹配(patterns) +4. 命中第一个规则后立即返回,不再继续匹配 +5. 如果所有规则都未匹配,返回 None(继续默认 RAG 流程) +``` + +### 3.2 关键词匹配规则 + +- **不区分大小写**:用户输入和关键词都转为小写后匹配 +- **子串匹配**:只要用户消息包含关键词即可(如 "我想退货" 包含 "退货") +- **任意匹配**:keywords 列表中任意一个关键词匹配即成功 + +**示例**: +```python +keywords = ["退货", "退款", "不想要"] +user_message = "我想退货" # ✅ 匹配成功(包含"退货") +user_message = "能退款吗" # ✅ 匹配成功(包含"退款") +user_message = "发货了吗" # ❌ 匹配失败 +``` + +### 3.3 正则表达式匹配规则 + +- **不区分大小写**:使用 `re.IGNORECASE` 标志 +- **全文匹配**:使用 `re.search()`,匹配消息中的任意位置 +- **任意匹配**:patterns 列表中任意一个模式匹配即成功 +- **错误处理**:如果正则表达式语法错误,记录警告并跳过该模式 + +**示例**: +```python +patterns = ["退.*货", "如何.*退货", "^退货.*"] +user_message = "我想退货" # ✅ 匹配 "退.*货" +user_message = "如何申请退货" # ✅ 匹配 "如何.*退货" +user_message = "退货流程" # ✅ 匹配 "^退货.*" +``` + +### 3.4 优先级机制 + +**规则排序**: +```sql +ORDER BY priority DESC, created_at DESC +``` + +**优先级策略**: +- 高优先级规则优先匹配 +- 相同优先级按创建时间倒序(新规则优先) +- 建议为重要规则设置更高的优先级(如 100、200) + +**示例场景**: +``` +规则 A: priority=100, keywords=["退货"] → 精确退货流程 +规则 B: priority=50, keywords=["退", "货"] → 通用退货咨询 +规则 C: priority=0, keywords=["产品"] → 产品咨询 + +用户输入 "我想退货": +1. 先匹配规则 A(priority=100)→ 命中,返回 +2. 不再匹配规则 B 和 C +``` + +--- + +## 4. API 使用 + +### 4.1 认证与租户隔离 + +所有接口必须携带以下 HTTP Headers: + +```http +X-API-Key: +X-Tenant-Id: +``` + +### 4.2 创建意图规则 + +**接口**:`POST /admin/intent-rules` + +**请求示例 1:固定回复** +```bash +curl -X POST http://ai-service:8080/admin/intent-rules \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your_api_key" \ + -H "X-Tenant-Id: szmp@ash@2026" \ + -d '{ + "name": "营业时间查询", + "keywords": ["营业时间", "几点开门", "几点关门", "什么时候营业"], + "priority": 50, + "response_type": "fixed", + "fixed_reply": "我们的营业时间是周一至周日 9:00-21:00,节假日正常营业。如有特殊情况会提前通知。" + }' +``` + +**请求示例 2:定向知识库检索** +```bash +curl -X POST http://ai-service:8080/admin/intent-rules \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your_api_key" \ + -H "X-Tenant-Id: szmp@ash@2026" \ + -d '{ + "name": "产品咨询意图", + "keywords": ["产品", "功能", "参数", "配置", "型号"], + "patterns": [".*产品.*", "有什么功能", "支持.*吗"], + "priority": 80, + "response_type": "rag", + "target_kb_ids": ["kb_product_001", "kb_product_faq_002"] + }' +``` + +**请求示例 3:启动话术流程** +```bash +curl -X POST http://ai-service:8080/admin/intent-rules \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your_api_key" \ + -H "X-Tenant-Id: szmp@ash@2026" \ + -d '{ + "name": "退货流程意图", + "keywords": ["退货", "退款", "不想要了", "申请退货"], + "patterns": ["退.*货", "如何.*退", "想.*退"], + "priority": 100, + "response_type": "flow", + "flow_id": "550e8400-e29b-41d4-a716-446655440000" + }' +``` + +**请求示例 4:转人工** +```bash +curl -X POST http://ai-service:8080/admin/intent-rules \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your_api_key" \ + -H "X-Tenant-Id: szmp@ash@2026" \ + -d '{ + "name": "转人工意图", + "keywords": ["人工", "客服", "投诉", "找人"], + "patterns": ["转.*人工", "找.*客服", "我要.*投诉"], + "priority": 200, + "response_type": "transfer", + "transfer_message": "正在为您转接人工客服,请稍候..." + }' +``` + +--- + +## 5. 总结 + +意图规则(Intent Rule)是 AI 中台的智能路由系统,在 12 步生成流程的第 3 步执行。 + +### 5.1 核心流程 + +创建规则 → 设置关键词/正则 → 配置响应类型 → 启用规则 → 用户对话 → 意图匹配 → 路由处理 + +### 5.2 关键特性 + +- **智能路由**: 根据用户意图自动选择最佳处理方式 +- **优先级控制**: 灵活的优先级机制避免冲突 +- **四种响应**: 固定回复、RAG 检索、话术流程、转人工 +- **高性能**: 60 秒缓存 + 优化的匹配算法 +- **租户隔离**: 多租户数据完全独立 +- **命中统计**: 自动记录规则使用情况 + +### 5.3 最佳实践 + +1. **关键词设计**: 2-6 个字,使用完整词组 +2. **正则表达式**: 简单明了,避免过于复杂 +3. **优先级分配**: 200+ 紧急、100-199 重要、50-99 常规、0-49 兜底 +4. **响应类型**: 根据场景选择最合适的类型 +5. **测试验证**: 先低优先级测试,再调整为正式优先级 +6. **监控优化**: 定期检查命中率,优化关键词 + +--- + +**文档版本**: v1.0 +**生成时间**: 2026-02-27 +**维护状态**: ✅ 活跃维护 +**相关文档**: +- [AI 中台对接文档](../AI中台对接文档.md) +- [Prompt 模板管理分析](./prompt-template-analysis.md) diff --git a/docs/prompt-template-analysis.md b/docs/prompt-template-analysis.md new file mode 100644 index 0000000..0fd5cf7 --- /dev/null +++ b/docs/prompt-template-analysis.md @@ -0,0 +1,626 @@ +# Prompt 模板管理生效机制与占位符使用分析 + +## 1. 核心架构 + +Prompt 模板管理系统由以下核心组件构成: + +### 1.1 数据模型 + +**PromptTemplate(模板实体)** +- `id`: UUID,模板唯一标识 +- `tenant_id`: 租户 ID,实现多租户隔离 +- `name`: 模板名称 +- `scene`: 场景标识(如 "default"、"customer_service") +- `description`: 模板描述 +- `is_default`: 是否为默认模板 + +**PromptTemplateVersion(模板版本)** +- `template_id`: 关联的模板 ID +- `version`: 版本号(整数,自增) +- `status`: 版本状态(draft/published/archived) +- `system_instruction`: 系统指令内容(包含占位符) +- `variables`: 自定义变量定义列表 + +### 1.2 核心服务 + +**PromptTemplateService** - 模板管理服务 +- 位置:`ai-service/app/services/prompt/template_service.py` +- 功能:模板 CRUD、版本管理、发布/回滚、缓存 + +**VariableResolver** - 变量解析器 +- 位置:`ai-service/app/services/prompt/variable_resolver.py` +- 功能:占位符替换、变量验证 + +**OrchestratorService** - 编排服务 +- 位置:`ai-service/app/services/orchestrator.py` +- 功能:在对话生成流程中加载和应用模板 + +--- + +## 2. 生效机制详解 + +### 2.1 模板加载流程(12 步 Pipeline 中的第 7 步) + +``` +用户请求 → Orchestrator._build_system_prompt() → 加载模板 → 解析变量 → 注入行为规则 → 传递给 LLM +``` + +**详细步骤**: + +1. **触发时机**:每次对话请求到达时,在 Step 7(PromptBuilder)执行 +2. **加载逻辑**: + ```python + # orchestrator.py:632-638 + template_service = PromptTemplateService(session) + template_version = await template_service.get_published_template( + tenant_id=ctx.tenant_id, + scene="default", # 场景可配置 + ) + ``` + +3. **缓存机制**: + - 首次加载:从数据库查询 `status=published` 的版本 + - 后续请求:从内存缓存读取(TTL 300 秒) + - 缓存失效:发布/回滚操作会自动清除缓存 + +4. **降级策略**: + - 如果没有已发布的模板 → 使用硬编码的 `SYSTEM_PROMPT` + - 如果数据库查询失败 → 使用硬编码的 `SYSTEM_PROMPT` + +### 2.2 版本管理机制 + +**版本状态流转**: +``` +draft(草稿)→ published(已发布)→ archived(已归档) +``` + +**发布流程**: +```python +# template_service.py:248-287 +async def publish_version(tenant_id, template_id, version): + 1. 查询模板是否存在(租户隔离) + 2. 将当前 published 版本改为 archived + 3. 将目标版本改为 published + 4. 清除缓存并预热新版本 + 5. 记录日志 +``` + +**回滚流程**: +```python +# template_service.py:289-298 +async def rollback_version(tenant_id, template_id, version): + # 实际上就是调用 publish_version + # 将历史版本重新标记为 published +``` + +**热更新保证**: +- 发布/回滚后立即清除缓存:`self._cache.invalidate(tenant_id, scene)` +- 下次请求会从数据库加载最新版本 +- 无需重启服务 + +### 2.3 租户隔离机制 + +所有操作都强制进行租户隔离: + +```python +# 查询时必须带 tenant_id +stmt = select(PromptTemplate).where( + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.scene == scene, +) +``` + +不同租户的模板完全独立,互不影响。 + +--- + +## 3. 占位符使用详解 + +### 3.1 占位符语法 + +**格式**:`{{variable_name}}` + +**示例**: +``` +你是 {{persona_name}},当前时间是 {{current_time}}。 +你正在为 {{tenant_name}} 提供 {{channel_type}} 渠道的客服服务。 +``` + +### 3.2 内置变量 + +**VariableResolver** 提供以下内置变量: + +| 变量名 | 类型 | 默认值 | 说明 | +|--------|------|--------|------| +| `persona_name` | string | "小N" | AI 人设名称 | +| `current_time` | function | 动态生成 | 当前时间(格式:YYYY-MM-DD HH:MM) | +| `channel_type` | string | "default" | 渠道类型(wechat/douyin/jd) | +| `tenant_name` | string | "平台" | 租户名称 | +| `session_id` | string | "" | 会话 ID | + +**动态变量示例**: +```python +# variable_resolver.py:15-21 +BUILTIN_VARIABLES = { + "persona_name": "小N", + "current_time": lambda: datetime.now().strftime("%Y-%m-%d %H:%M"), + "channel_type": "default", + "tenant_name": "平台", + "session_id": "", +} +``` + +### 3.3 自定义变量 + +**定义方式**:在模板的 `variables` 字段中定义 + +```json +{ + "variables": [ + { + "name": "company_name", + "default": "XX科技有限公司", + "description": "公司名称" + }, + { + "name": "service_hours", + "default": "9:00-18:00", + "description": "服务时间" + } + ] +} +``` + +**使用示例**: +``` +欢迎咨询 {{company_name}},我们的服务时间是 {{service_hours}}。 +``` + +### 3.4 变量解析流程 + +```python +# variable_resolver.py:45-75 +def resolve(template, variables, extra_context): + 1. 构建上下文:内置变量 + 自定义变量 + 额外上下文 + 2. 正则匹配:找到所有 {{variable}} 占位符 + 3. 替换逻辑: + - 如果变量存在 → 替换为值(函数则调用) + - 如果变量不存在 → 保留原占位符 + 记录警告 + 4. 返回解析后的字符串 +``` + +**正则表达式**: +```python +VARIABLE_PATTERN = re.compile(r"\{\{(\w+)\}\}") +``` + +### 3.5 变量优先级 + +变量解析的优先级(从高到低): + +1. **extra_context**(运行时传入的额外上下文) +2. **自定义变量**(模板定义的 variables) +3. **实例化时的上下文**(VariableResolver 构造函数传入) +4. **内置变量**(BUILTIN_VARIABLES) + +```python +# variable_resolver.py:77-101 +def _build_context(variables, extra_context): + context = {} + + # 1. 加载内置变量 + for key, value in BUILTIN_VARIABLES.items(): + if key in self._context: + context[key] = self._context[key] # 实例化时的上下文 + else: + context[key] = value # 内置默认值 + + # 2. 加载自定义变量 + if variables: + for var in variables: + context[var["name"]] = var.get("default", "") + + # 3. 加载额外上下文(优先级最高) + if extra_context: + context.update(extra_context) + + return context +``` + +--- + +## 4. 实际使用示例 + +### 4.1 创建模板(通过 API) + +```bash +POST /admin/prompt-templates +X-Tenant-Id: szmp@ash@2026 +X-API-Key: your_api_key + +{ + "name": "客服模板 v1", + "scene": "default", + "description": "标准客服对话模板", + "system_instruction": "你是 {{persona_name}},一位专业的客服助手。\n当前时间:{{current_time}}\n渠道:{{channel_type}}\n\n你需要遵循以下原则:\n- 礼貌、专业、耐心\n- 优先使用知识库内容回答\n- 无法回答时建议转人工\n\n公司信息:{{company_name}}\n服务时间:{{service_hours}}", + "variables": [ + { + "name": "company_name", + "default": "XX科技", + "description": "公司名称" + }, + { + "name": "service_hours", + "default": "9:00-21:00", + "description": "服务时间" + } + ], + "is_default": true +} +``` + +**响应**: +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "客服模板 v1", + "scene": "default", + "description": "标准客服对话模板", + "is_default": true, + "created_at": "2026-02-27T12:00:00Z", + "updated_at": "2026-02-27T12:00:00Z" +} +``` + +此时模板已创建,但版本状态为 `draft`,尚未生效。 + +### 4.2 发布模板 + +```bash +POST /admin/prompt-templates/{tpl_id}/publish +X-Tenant-Id: szmp@ash@2026 +X-API-Key: your_api_key + +{ + "version": 1 +} +``` + +**响应**: +```json +{ + "success": true, + "message": "Version 1 published successfully" +} +``` + +**生效时间**:立即生效(缓存已清除) + +### 4.3 模板生效后的实际效果 + +**用户请求**: +```json +POST /ai/chat +X-Tenant-Id: szmp@ash@2026 +X-API-Key: your_api_key + +{ + "sessionId": "kf_001_wx123_1708765432000", + "currentMessage": "你好", + "channelType": "wechat" +} +``` + +**Orchestrator 内部处理**: + +1. **加载模板**(Step 7): + ```python + # 从缓存或数据库加载已发布的模板 + template_version = await template_service.get_published_template( + tenant_id="szmp@ash@2026", + scene="default" + ) + # 返回:system_instruction + variables + ``` + +2. **解析变量**: + ```python + resolver = VariableResolver( + channel_type="wechat", + tenant_name="深圳某项目", + session_id="kf_001_wx123_1708765432000" + ) + + system_prompt = resolver.resolve( + template=template_version.system_instruction, + variables=template_version.variables, + extra_context={"persona_name": "AI助手"} + ) + ``` + +3. **解析结果**: + ``` + 你是 AI助手,一位专业的客服助手。 + 当前时间:2026-02-27 20:18 + 渠道:wechat + + 你需要遵循以下原则: + - 礼貌、专业、耐心 + - 优先使用知识库内容回答 + - 无法回答时建议转人工 + + 公司信息:XX科技 + 服务时间:9:00-21:00 + ``` + +4. **注入行为规则**(如果有): + ```python + # 从数据库加载行为规则 + rules = await behavior_service.get_enabled_rules(tenant_id) + + # 拼接到 system_prompt + behavior_text = "\n".join([f"- {rule}" for rule in rules]) + system_prompt += f"\n\n行为约束:\n{behavior_text}" + ``` + +5. **传递给 LLM**: + ```python + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": "你好"} + ] + + response = await llm_client.generate(messages) + ``` + +### 4.4 更新模板 + +```bash +PUT /admin/prompt-templates/{tpl_id} +X-Tenant-Id: szmp@ash@2026 +X-API-Key: your_api_key + +{ + "system_instruction": "你是 {{persona_name}},一位专业且友好的客服助手。\n...", + "variables": [ + { + "name": "company_name", + "default": "XX科技有限公司", # 修改了默认值 + "description": "公司全称" + } + ] +} +``` + +**效果**: +- 创建新版本(version=2,status=draft) +- 旧版本(version=1)仍然是 published 状态 +- **模板不会立即生效**,需要发布 version 2 + +### 4.5 回滚模板 + +```bash +POST /admin/prompt-templates/{tpl_id}/rollback +X-Tenant-Id: szmp@ash@2026 +X-API-Key: your_api_key + +{ + "version": 1 +} +``` + +**效果**: +- version 2 变为 archived +- version 1 重新变为 published +- 缓存清除,立即生效 + +--- + +## 5. 高级特性 + +### 5.1 变量验证 + +```python +# variable_resolver.py:115-144 +def validate_variables(template, defined_variables): + """ + 验证模板中的所有变量是否已定义 + + 返回: + { + "valid": True/False, + "missing": ["未定义的变量列表"], + "used_variables": ["模板中使用的所有变量"] + } + """ +``` + +**使用场景**: +- 前端编辑模板时实时验证 +- 发布前检查是否有未定义的变量 + +### 5.2 变量提取 + +```python +# variable_resolver.py:103-113 +def extract_variables(template): + """ + 从模板中提取所有变量名 + + 返回:["persona_name", "current_time", "company_name"] + """ +``` + +**使用场景**: +- 前端显示模板使用的变量列表 +- 自动生成变量定义表单 + +### 5.3 缓存策略 + +**TemplateCache** 实现: +```python +# template_service.py:32-72 +class TemplateCache: + def __init__(self, ttl_seconds=300): + self._cache = {} # key: (tenant_id, scene) + self._ttl = 300 # 5 分钟 + + def get(self, tenant_id, scene): + # 检查是否过期 + if time.time() - cached_at < self._ttl: + return version + else: + del self._cache[key] # 自动清理过期缓存 + + def invalidate(self, tenant_id, scene=None): + # 发布/回滚时清除缓存 +``` + +**缓存失效时机**: +- 发布新版本 +- 回滚到旧版本 +- 更新模板(创建新版本时不清除,因为新版本是 draft) +- TTL 过期(5 分钟) + +--- + +## 6. 最佳实践 + +### 6.1 模板设计建议 + +1. **使用语义化的变量名**: + ``` + ✅ {{company_name}}、{{service_hours}} + ❌ {{var1}}、{{x}} + ``` + +2. **为所有自定义变量提供默认值**: + ```json + { + "name": "company_name", + "default": "XX公司", // 必须提供 + "description": "公司名称" + } + ``` + +3. **避免在模板中硬编码业务数据**: + ``` + ❌ 你是小明,为 XX 公司提供服务 + ✅ 你是 {{persona_name}},为 {{company_name}} 提供服务 + ``` + +4. **合理使用内置变量**: + - `current_time`:适用于时间敏感的场景 + - `channel_type`:适用于多渠道差异化话术 + - `session_id`:适用于调试和追踪 + +### 6.2 版本管理建议 + +1. **小步迭代**: + - 每次修改创建新版本 + - 在测试环境验证后再发布 + - 保留历史版本以便回滚 + +2. **版本命名规范**(在 description 中): + ``` + v1.0 - 初始版本 + v1.1 - 优化语气,增加公司信息变量 + v1.2 - 修复变量引用错误 + ``` + +3. **灰度发布**(未来扩展): + - 可以为不同租户发布不同版本 + - 可以按百分比逐步切换版本 + +### 6.3 性能优化建议 + +1. **利用缓存**: + - 模板内容很少变化,缓存命中率高 + - 5 分钟 TTL 平衡了实时性和性能 + +2. **避免频繁发布**: + - 发布操作会清除缓存 + - 建议批量修改后统一发布 + +3. **监控缓存命中率**: + ```python + logger.debug(f"Cache hit for template: tenant={tenant_id}, scene={scene}") + ``` + +--- + +## 7. 故障排查 + +### 7.1 模板未生效 + +**症状**:发布后仍使用旧模板或硬编码 SYSTEM_PROMPT + +**排查步骤**: +1. 检查版本状态:`GET /admin/prompt-templates/{tpl_id}` + - 确认目标版本的 status 是否为 `published` +2. 检查缓存:等待 5 分钟或重启服务 +3. 检查日志: + ``` + [AC-AISVC-51] Cache hit for template: tenant=xxx, scene=default + [AC-AISVC-51] Loaded published template from DB: tenant=xxx, scene=default + [AC-AISVC-51] No published template found, using fallback + ``` + +### 7.2 变量未替换 + +**症状**:生成的 system_prompt 中仍有 `{{variable}}` 占位符 + +**排查步骤**: +1. 检查变量定义:确认变量在 `variables` 中定义 +2. 检查变量名拼写:必须完全匹配(区分大小写) +3. 检查日志: + ``` + WARNING: Unknown variable in template: xxx + ``` + +### 7.3 租户隔离问题 + +**症状**:租户 A 看到了租户 B 的模板 + +**排查步骤**: +1. 检查请求头:确认 `X-Tenant-Id` 正确 +2. 检查数据库: + ```sql + SELECT * FROM prompt_templates WHERE tenant_id = 'xxx'; + ``` +3. 检查代码:所有查询必须带 `tenant_id` 过滤 + +--- + +## 8. 总结 + +### 8.1 核心流程 + +``` +创建模板 → 编辑内容 → 发布版本 → 缓存加载 → 变量解析 → 传递给 LLM + ↓ ↓ ↓ ↓ ↓ ↓ + draft draft published 内存缓存 占位符替换 生成回复 +``` + +### 8.2 关键特性 + +- ✅ **版本管理**:支持多版本、发布/回滚、历史追溯 +- ✅ **热更新**:发布后立即生效,无需重启 +- ✅ **租户隔离**:多租户数据完全隔离 +- ✅ **缓存优化**:5 分钟 TTL,减少数据库查询 +- ✅ **变量系统**:内置变量 + 自定义变量,支持动态值 +- ✅ **降级策略**:模板不可用时自动回退到硬编码 + +### 8.3 扩展方向 + +- 🔄 **场景路由**:根据 intent 或 channel 自动选择不同 scene +- 🔄 **A/B 测试**:同一场景支持多个模板并行测试 +- 🔄 **模板继承**:子模板继承父模板并覆盖部分内容 +- 🔄 **变量类型**:支持 string/number/boolean/array 等类型 +- 🔄 **条件渲染**:支持 `{{#if}}...{{/if}}` 等逻辑控制 + +--- + +**文档生成时间**:2026-02-27 20:18 +**相关代码版本**:v0.6.0 +**维护状态**:✅ 活跃维护 diff --git a/docs/script-flow-usage.md b/docs/script-flow-usage.md new file mode 100644 index 0000000..9e001ac --- /dev/null +++ b/docs/script-flow-usage.md @@ -0,0 +1,663 @@ +# 话术流程管理使用指南 + +## 1. 概述 + +话术流程(Script Flow)是 AI 中台的多轮对话引导系统,用于按照预定义的步骤引导用户完成信息收集、问题诊断或业务流程。它通过状态机机制实现结构化的对话流程控制。 + +### 1.1 核心特性 + +- ✅ **多步骤编排**:支持创建包含多个步骤的对话流程 +- ✅ **拖拽排序**:可视化拖拽调整步骤顺序 +- ✅ **等待输入控制**:每个步骤可配置是否等待用户回复 +- ✅ **超时处理**:支持超时重复、跳过或转人工 +- ✅ **变量占位符**:话术内容支持动态变量替换 +- ✅ **流程预览**:可视化预览流程执行效果 +- ✅ **意图触发**:通过意图规则自动触发流程 +- ✅ **状态管理**:自动跟踪流程执行进度 + +### 1.2 典型应用场景 + +| 场景 | 说明 | 示例 | +|------|------|------| +| **订单处理** | 收集订单信息并确认 | 收集收货地址 → 确认商品 → 选择支付方式 → 确认下单 | +| **退货流程** | 引导用户完成退货申请 | 确认订单号 → 选择退货原因 → 上传凭证 → 提交申请 | +| **问题诊断** | 逐步排查用户问题 | 确认设备型号 → 检查网络连接 → 重启设备 → 测试结果 | +| **信息采集** | 收集用户需求或反馈 | 询问需求类型 → 收集详细描述 → 确认联系方式 → 提交工单 | +| **预约服务** | 引导用户完成预约 | 选择服务类型 → 选择时间 → 填写联系方式 → 确认预约 | + +--- + +## 2. 功能介绍 + +### 2.1 流程列表页面 + +**访问路径**:AI 中台管理后台 → 智能路由 → 话术流程管理 + +**页面功能**: +- 查看所有话术流程列表 +- 显示流程名称、描述、步骤数、关联规则数、状态、更新时间 +- 快速启用/禁用流程 +- 创建、编辑、预览、删除流程 + +**列表字段说明**: + +| 字段 | 说明 | +|------|------| +| 流程名称 | 流程的唯一标识名称 | +| 描述 | 流程的用途说明(可选) | +| 步骤数 | 流程包含的步骤总数 | +| 关联规则 | 触发该流程的意图规则数量 | +| 状态 | 启用/禁用开关 | +| 更新时间 | 最后修改时间 | + +### 2.2 流程编辑器 + +**核心组件**: +1. **基本信息区**:配置流程名称、描述、启用状态 +2. **步骤编辑区**:可视化编辑流程步骤 +3. **拖拽排序**:通过拖拽图标调整步骤顺序 +4. **步骤配置**:为每个步骤配置详细参数 + +### 2.3 流程预览器 + +**功能**: +- 时间轴可视化展示流程步骤 +- 模拟流程执行过程 +- 高亮显示当前步骤 +- 查看步骤配置详情 +- 支持上一步/下一步/重置操作 + +--- + +## 3. 使用步骤 + +### 3.1 创建话术流程 + +#### 步骤 1:进入创建页面 + +1. 点击页面右上角的 **"新建流程"** 按钮 +2. 弹出流程编辑对话框 + +#### 步骤 2:填写基本信息 + +``` +流程名称:退货流程引导 (必填,建议简洁明确) +描述:引导用户完成退货申请 (可选,说明流程用途) +启用状态:✓ 启用 (默认启用) +``` + +#### 步骤 3:添加流程步骤 + +点击 **"添加步骤"** 按钮,配置每个步骤: + +**步骤 1 配置示例**: +``` +话术内容:您好,我来帮您处理退货申请。请提供您的订单号。 +等待输入:✓ 开启 +超时时间:60 秒 +超时动作:重复当前步骤 +``` + +**步骤 2 配置示例**: +``` +话术内容:收到订单号 {{order_id}},请问您的退货原因是什么? +1. 商品质量问题 +2. 不符合预期 +3. 其他原因 +等待输入:✓ 开启 +超时时间:120 秒 +超时动作:转人工 +``` + +**步骤 3 配置示例**: +``` +话术内容:好的,已记录您的退货原因。请上传商品照片或相关凭证。 +等待输入:✓ 开启 +超时时间:180 秒 +超时动作:跳过进入下一步 +``` + +**步骤 4 配置示例**: +``` +话术内容:感谢您的配合!退货申请已提交,工单号为 {{ticket_id}}。我们将在 1-3 个工作日内处理,请保持手机畅通。 +等待输入:✗ 关闭 +``` + +#### 步骤 4:调整步骤顺序 + +- 鼠标悬停在步骤左侧的 **拖拽图标** 上 +- 按住鼠标左键拖动步骤到目标位置 +- 释放鼠标完成排序 + +#### 步骤 5:保存流程 + +- 点击 **"创建"** 按钮保存流程 +- 系统自动分配流程 ID +- 返回流程列表页面 + +### 3.2 编辑话术流程 + +1. 在流程列表中找到目标流程 +2. 点击 **"编辑"** 按钮 +3. 修改流程信息或步骤配置 +4. 点击 **"保存"** 按钮 + +**注意事项**: +- 修改后立即生效,影响正在执行的流程 +- 建议在低峰期修改重要流程 +- 修改前可先禁用流程,测试无误后再启用 + +### 3.3 预览话术流程 + +1. 在流程列表中找到目标流程 +2. 点击 **"预览"** 按钮 +3. 在右侧抽屉中查看流程时间轴 +4. 使用 **上一步/下一步** 按钮模拟流程执行 +5. 查看每个步骤的配置详情 + +**预览功能**: +- 时间轴可视化展示所有步骤 +- 当前步骤高亮显示(蓝色边框) +- 已完成步骤显示为绿色 +- 未执行步骤显示为灰色 +- 显示步骤的等待输入、超时配置 + +### 3.4 删除话术流程 + +1. 在流程列表中找到目标流程 +2. 点击 **"删除"** 按钮 +3. 确认删除操作 + +**警告**: +- 删除操作不可恢复 +- 如果有意图规则关联该流程,删除后规则将失效 +- 建议先禁用流程观察一段时间,确认无影响后再删除 + +### 3.5 启用/禁用流程 + +**方式 1:列表页快速切换** +- 直接点击流程列表中的 **状态开关** +- 立即生效 + +**方式 2:编辑页面修改** +- 进入流程编辑页面 +- 修改 **启用状态** 开关 +- 保存后生效 + +**禁用效果**: +- 禁用后,意图规则无法触发该流程 +- 正在执行的流程实例不受影响,继续执行完成 +- 流程数据保留,可随时重新启用 + +--- + +## 4. 步骤配置详解 + +### 4.1 话术内容 + +**功能**:定义该步骤向用户展示的话术文本。 + +**支持变量占位符**: +``` +语法:{{variable_name}} + +示例: +- 您的订单号是 {{order_id}} +- 尊敬的 {{customer_name}},您好! +- 当前时间:{{current_time}} +- 渠道:{{channel_type}} +``` + +**变量来源**: +- 会话上下文变量 +- 用户输入提取的信息 +- 系统内置变量(时间、渠道等) + +**最佳实践**: +- 话术简洁明了,避免过长 +- 使用礼貌用语,保持专业 +- 明确告知用户需要做什么 +- 提供选项时使用编号列表 + +### 4.2 等待用户输入 + +**功能**:控制该步骤是否需要等待用户回复。 + +**开启(✓)**: +- 系统发送话术后,等待用户回复 +- 收到用户回复后,根据条件推进到下一步 +- 如果超时未回复,执行超时动作 + +**关闭(✗)**: +- 系统发送话术后,立即推进到下一步 +- 适用于纯信息告知的步骤(如确认信息、结束语) + +**使用场景**: +- ✓ 需要收集信息:订单号、退货原因、联系方式 +- ✓ 需要用户确认:是/否、选择选项 +- ✗ 纯信息展示:流程说明、结果通知、感谢语 + +### 4.3 超时时间 + +**功能**:设置等待用户回复的最长时间(单位:秒)。 + +**取值范围**:5 - 300 秒 + +**推荐配置**: +- **简单问题**(如是/否):30-60 秒 +- **需要查找信息**(如订单号):60-120 秒 +- **需要操作**(如上传图片):120-180 秒 +- **复杂问题**(如详细描述):180-300 秒 + +**注意事项**: +- 超时时间过短:用户体验差,容易误触发超时 +- 超时时间过长:流程卡住时间长,影响效率 +- 根据实际业务场景调整 + +### 4.4 超时动作 + +**功能**:定义超时后的处理策略。 + +#### 选项 1:重复当前步骤 + +**行为**:重新发送当前步骤的话术,再次等待用户回复。 + +**适用场景**: +- 用户可能暂时离开,需要提醒 +- 问题简单,用户可能忘记回复 +- 重要信息收集,不能跳过 + +**示例话术**: +``` +首次:请提供您的订单号。 +重复:您还在吗?请提供您的订单号,以便我帮您处理。 +``` + +#### 选项 2:跳过进入下一步 + +**行为**:跳过当前步骤,直接进入下一步。 + +**适用场景**: +- 可选信息收集(如备注、补充说明) +- 有默认值或兜底方案 +- 不影响流程继续执行 + +**注意事项**: +- 确保下一步不依赖当前步骤的输入 +- 在后续步骤中处理缺失信息的情况 + +#### 选项 3:转人工 + +**行为**:结束流程,将会话转接到人工客服。 + +**适用场景**: +- 关键信息收集失败 +- 用户长时间无响应 +- 流程无法继续执行 + +**效果**: +- 设置 `shouldTransfer=true` +- 返回转人工提示语 +- 流程实例标记为"已完成" + +--- + +## 5. 与意图规则关联 + +### 5.1 创建触发规则 + +话术流程需要通过 **意图规则** 触发。 + +**步骤**: +1. 进入 **意图规则管理** 页面 +2. 点击 **"新建规则"** 按钮 +3. 配置规则信息: + +``` +规则名称:退货意图 +关键词:退货、退款、不想要了、申请退货 +正则表达式:退.*货、如何.*退、想.*退 +优先级:100 +响应类型:启动话术流程 +关联流程:选择 "退货流程引导" +启用状态:✓ 启用 +``` + +4. 保存规则 + +### 5.2 触发流程 + +**触发条件**: +- 用户消息匹配意图规则的关键词或正则表达式 +- 规则的响应类型为 `flow` +- 规则和流程均为启用状态 + +**触发流程**: +``` +用户输入:我想退货 + ↓ +意图识别:命中 "退货意图" 规则 + ↓ +启动流程:创建 "退货流程引导" 实例 + ↓ +执行步骤 1:发送第一步话术 + ↓ +等待用户输入:收集订单号 + ↓ +执行步骤 2:发送第二步话术 + ↓ +... + ↓ +流程完成:恢复正常对话 +``` + +### 5.3 流程执行优先级 + +**优先级规则**: +1. **进行中的流程** > 意图识别 > RAG 检索 +2. 如果会话存在进行中的流程实例,优先处理流程逻辑 +3. 流程完成后,恢复正常的意图识别和 RAG 流程 + +**示例**: +``` +会话状态:退货流程进行中(步骤 2) +用户输入:营业时间是什么? +系统行为:忽略 "营业时间" 意图,继续处理流程步骤 2 +``` + +--- + +## 6. 最佳实践 + +### 6.1 流程设计原则 + +#### 1. 步骤数量适中 +- **推荐**:3-7 个步骤 +- **避免**:步骤过多(>10 步)导致用户疲劳 +- **避免**:步骤过少(<3 步)无法体现流程价值 + +#### 2. 话术简洁明确 +- 每个步骤的话术控制在 50-100 字 +- 明确告知用户需要做什么 +- 提供示例或选项帮助用户理解 + +#### 3. 合理设置超时 +- 根据任务复杂度设置超时时间 +- 重要步骤使用 "重复" 或 "转人工" +- 可选步骤使用 "跳过" + +#### 4. 提供退出机制 +- 在话术中告知用户如何退出流程 +- 示例:"如需退出流程,请回复 '退出' 或 '转人工'" + +### 6.2 话术编写技巧 + +#### 1. 使用礼貌用语 +``` +✓ 好的:您好,我来帮您处理退货申请。 +✗ 不好:提供订单号。 +``` + +#### 2. 明确指令 +``` +✓ 好的:请提供您的订单号(格式:20240227001)。 +✗ 不好:请提供信息。 +``` + +#### 3. 提供选项 +``` +✓ 好的:请选择退货原因: + 1. 商品质量问题 + 2. 不符合预期 + 3. 其他原因 +✗ 不好:为什么要退货? +``` + +#### 4. 确认信息 +``` +✓ 好的:您的订单号是 {{order_id}},退货原因是 {{reason}},确认无误吗?(是/否) +✗ 不好:信息已记录。 +``` + +### 6.3 测试与优化 + +#### 1. 测试流程 +- 创建流程后,先设置为 "禁用" 状态 +- 使用测试账号模拟完整流程 +- 测试各种用户输入场景(正常、异常、超时) +- 确认无误后再启用 + +#### 2. 监控数据 +- 定期查看流程执行统计 +- 关注步骤完成率、超时率、转人工率 +- 识别用户卡住的步骤 + +#### 3. 持续优化 +- 根据数据调整话术和超时配置 +- 优化步骤顺序和数量 +- 收集用户反馈改进流程 + +### 6.4 常见问题处理 + +#### 问题 1:用户中途退出流程 +**解决方案**: +- 在话术中明确告知退出方式 +- 设置关键词触发退出(如 "退出"、"转人工") +- 超时后自动转人工 + +#### 问题 2:用户输入不符合预期 +**解决方案**: +- 在话术中提供示例和格式说明 +- 使用选项列表限制用户输入 +- 设置输入验证和错误提示 + +#### 问题 3:流程执行时间过长 +**解决方案**: +- 减少步骤数量,合并相似步骤 +- 缩短超时时间 +- 将复杂流程拆分为多个子流程 + +#### 问题 4:流程与意图规则冲突 +**解决方案**: +- 提高流程触发规则的优先级 +- 在流程执行期间,系统自动优先处理流程逻辑 +- 避免创建过于宽泛的意图规则 + +--- + +## 7. API 参考 + +### 7.1 列表查询 + +**接口**:`GET /admin/script-flows` + +**请求参数**: +```json +{ + "is_enabled": true // 可选,筛选启用/禁用的流程 +} +``` + +**响应示例**: +```json +{ + "data": [ + { + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "退货流程引导", + "description": "引导用户完成退货申请", + "step_count": 4, + "is_enabled": true, + "linked_rule_count": 2, + "created_at": "2026-02-27T10:00:00", + "updated_at": "2026-02-27T15:30:00" + } + ] +} +``` + +### 7.2 创建流程 + +**接口**:`POST /admin/script-flows` + +**请求示例**: +```json +{ + "name": "退货流程引导", + "description": "引导用户完成退货申请", + "is_enabled": true, + "steps": [ + { + "step_id": "step_001", + "order": 1, + "content": "您好,我来帮您处理退货申请。请提供您的订单号。", + "wait_for_input": true, + "timeout_seconds": 60, + "timeout_action": "repeat", + "next_conditions": [] + }, + { + "step_id": "step_002", + "order": 2, + "content": "收到订单号 {{order_id}},请问您的退货原因是什么?", + "wait_for_input": true, + "timeout_seconds": 120, + "timeout_action": "transfer", + "next_conditions": [] + } + ] +} +``` + +**响应示例**: +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "退货流程引导", + "description": "引导用户完成退货申请", + "step_count": 2, + "is_enabled": true, + "created_at": "2026-02-27T16:00:00", + "updated_at": "2026-02-27T16:00:00" +} +``` + +### 7.3 查询详情 + +**接口**:`GET /admin/script-flows/{flow_id}` + +**响应示例**: +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "退货流程引导", + "description": "引导用户完成退货申请", + "is_enabled": true, + "steps": [ + { + "step_id": "step_001", + "order": 1, + "content": "您好,我来帮您处理退货申请。请提供您的订单号。", + "wait_for_input": true, + "timeout_seconds": 60, + "timeout_action": "repeat", + "next_conditions": [] + } + ], + "created_at": "2026-02-27T16:00:00", + "updated_at": "2026-02-27T16:00:00" +} +``` + +### 7.4 更新流程 + +**接口**:`PUT /admin/script-flows/{flow_id}` + +**请求示例**: +```json +{ + "name": "退货流程引导(优化版)", + "is_enabled": true, + "steps": [ + // 更新后的步骤列表 + ] +} +``` + +### 7.5 删除流程 + +**接口**:`DELETE /admin/script-flows/{flow_id}` + +**响应**:204 No Content + +--- + +## 8. 数据模型 + +### 8.1 ScriptFlow(流程实体) + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `id` | UUID | 自动生成 | 流程唯一标识 | +| `tenant_id` | string | ✅ | 租户 ID | +| `name` | string | ✅ | 流程名称 | +| `description` | string | ❌ | 流程描述 | +| `steps` | FlowStep[] | ✅ | 步骤列表 | +| `is_enabled` | boolean | ✅ | 是否启用(默认 true) | +| `created_at` | datetime | 自动 | 创建时间 | +| `updated_at` | datetime | 自动 | 更新时间 | + +### 8.2 FlowStep(步骤实体) + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `step_id` | string | ✅ | 步骤唯一标识 | +| `order` | int | ✅ | 步骤顺序(从 1 开始) | +| `content` | string | ✅ | 话术内容 | +| `wait_for_input` | boolean | ✅ | 是否等待用户输入 | +| `timeout_seconds` | int | ❌ | 超时时间(5-300 秒) | +| `timeout_action` | string | ❌ | 超时动作:repeat/skip/transfer | +| `next_conditions` | NextCondition[] | ❌ | 下一步条件(预留) | + +### 8.3 超时动作枚举 + +| 值 | 说明 | +|------|------| +| `repeat` | 重复当前步骤 | +| `skip` | 跳过进入下一步 | +| `transfer` | 转人工 | + +--- + +## 9. 总结 + +话术流程管理是 AI 中台的核心功能之一,通过可视化的流程编排,实现结构化的多轮对话引导。 + +### 9.1 核心价值 + +- **提升效率**:自动化处理标准流程,减少人工介入 +- **保证质量**:标准化话术,确保服务一致性 +- **优化体验**:引导式对话,降低用户操作难度 +- **数据收集**:结构化收集信息,便于后续处理 + +### 9.2 使用流程 + +创建流程 → 配置步骤 → 关联意图规则 → 测试验证 → 启用上线 → 监控优化 + +### 9.3 注意事项 + +1. 流程设计要简洁,避免步骤过多 +2. 话术要清晰明确,提供必要的示例 +3. 合理设置超时时间和超时动作 +4. 测试充分后再上线 +5. 定期监控数据并优化流程 + +--- + +**文档版本**:v1.0 +**生成时间**:2026-02-27 +**维护状态**:✅ 活跃维护 +**相关文档**: +- [意图规则使用指南](./intent-rule-usage.md) +- [AI 中台对接文档](../AI中台对接文档.md) +- [Prompt 模板管理分析](./prompt-template-analysis.md) diff --git a/docs/session-handoff-protocol.md b/docs/session-handoff-protocol.md index 1815aad..911d50c 100644 --- a/docs/session-handoff-protocol.md +++ b/docs/session-handoff-protocol.md @@ -34,6 +34,8 @@ context: module: "{module_name}" # 对应 spec// 目录名 feature: "{feature_id}" # 对应 requirements.md 中的 feature_id status: [🔄进行中 | ⏳待开始 | ✅已完成] + version: "0.3.0" # 当前迭代版本号 + active_ac_range: "AC-MOD-21~30" # 当前活跃的 AC 编号范围 spec_references: # 必须引用模块 Spec 目录下的 SSOT 文档 @@ -43,6 +45,9 @@ spec_references: design: "spec/{module}/design.md" tasks: "spec/{module}/tasks.md" + # 版本化迭代信息(从 requirements.md frontmatter 读取) + active_version: "0.2.0-0.3.0" # 活跃版本范围(仅关注这些版本的 AC) + overall_progress: format: "- [ ] Phase X: 名称 (进度%) [关联 Tasks.md ID]" min_phases: 3 @@ -87,7 +92,11 @@ startup_guide: ### 1. 启动模式 - **继续模式**:检查 `docs/progress/` 下是否存在对应模块的进度文档。若存在,Read 文档 → 引用 Spec 目录 → 简短汇报状态 → 直接开始。 -- **新建模式**:若满足“触发条件(硬门禁阈值)”任一项(或涉及跨模块并行),必须先创建进度文档,再开始工作。 +- **新建模式**:若满足”触发条件(硬门禁阈值)”任一项(或涉及跨模块并行),必须先创建进度文档,再开始工作。 +- **版本化迭代模式**: + - 读取 `requirements.md` 的 frontmatter,识别 `active_version` 和 `version` + - 在进度文档中记录 `active_ac_range`(如 `AC-AISVC-91~110`) + - 仅关注活跃版本的 AC,历史版本(折叠在 `
` 中)可跳过 ### 2. 下一步行动(The Gold Rule) - 禁止输出模糊的下一步(如“继续开发”)。 @@ -100,6 +109,13 @@ startup_guide: ### 4. 禁止事项 - 禁止编造或假设需求;信息不足必须询问用户,并在 Progress 中记录澄清结果。 - 禁止使用不存在的工具接口名;所有操作应基于当前环境可用工具。 +- 禁止引用历史版本(已折叠)的 AC 编号;所有代码/测试/提交必须引用活跃版本的 AC。 + +### 5. 版本化迭代支持 +- **读取版本信息**:启动时从 `requirements.md` frontmatter 读取 `active_version` 和 `version_history` +- **聚焦活跃版本**:仅关注活跃版本范围内的 AC(如 `0.6.0-0.7.0`),历史版本可跳过 +- **AC 编号连续性**:新迭代的 AC 编号延续上一版本(如 `AC-AISVC-90` → `AC-AISVC-91`) +- **进度文档同步**:在 `context.active_ac_range` 中记录当前迭代的 AC 范围,便于快速定位 --- diff --git a/docs/spec-product-zh.md b/docs/spec-product-zh.md index f5b99c7..3d55158 100644 --- a/docs/spec-product-zh.md +++ b/docs/spec-product-zh.md @@ -225,10 +225,153 @@ source: --- -## 4. 如何执行 +## 4. 版本化迭代规则(Version Iteration Protocol) + +### 4.1 问题背景 + +在多次迭代后,规范文档(尤其是 `requirements.md`)会因 AC 不断累积导致文档膨胀,进而引发: +- AI 上下文快速占满(单个文档可能 500+ 行) +- 历史需求干扰当前迭代的理解 +- 难以快速定位当前活跃的 AC 范围 + +### 4.2 解决方案:单文档内版本分区 + +采用 **单文档内版本分区 + 历史折叠** 策略: +- 保留最近 1-2 个版本为”活跃版本”(展开) +- 将更早的版本折叠为 `
` 标签(AI 默认跳过) +- 在 frontmatter 中标记 `active_version` 和 `version_history` + +### 4.3 requirements.md 版本化模板 + +```markdown +--- +feature_id: “MOD” # 模块短 ID +title: “模块需求规范” +status: “in-progress” +version: “0.3.0” # 当前最新版本 +active_version: “0.2.0-0.3.0” # 活跃版本范围(展开显示) +version_history: # 版本历史索引 + - version: “0.3.0” + ac_range: “AC-MOD-21~30” + description: “功能增强 C” + - version: “0.2.0” + ac_range: “AC-MOD-11~20” + description: “功能扩展 B” + - version: “0.1.0” + ac_range: “AC-MOD-01~10” + description: “基础功能 A(已折叠)” +last_updated: “2026-02-27” +--- + +# 模块需求规范(MOD) + +## 1. 背景与目标 +[保持不变,描述整体背景] + +## 2. 模块边界(Scope) +[保持不变] + +## 3. 依赖盘点(Dependencies) +[保持不变] + +## 4. 用户故事(User Stories) +[仅保留活跃版本的 US,历史 US 折叠] + +## 5. 验收标准(Acceptance Criteria, EARS) + +### 📌 当前活跃版本(v0.2.0 - v0.3.0) + +#### 5.2 功能扩展 B(v0.2.0) +- [AC-MOD-11] WHEN 用户执行操作 X THEN 系统 SHALL 返回结果 Y +- [AC-MOD-12] WHEN 参数无效 THEN 系统 SHALL 返回 400 错误 +[展开显示 v0.2.0 的所有 AC] + +#### 5.3 功能增强 C(v0.3.0) +- [AC-MOD-21] WHEN 用户触发事件 Z THEN 系统 SHALL 执行流程 W +- [AC-MOD-22] WHEN 条件满足 THEN 系统 SHALL 更新状态 +[展开显示 v0.3.0 的所有 AC] + +--- + +### 📦 历史版本(已归档) + +
+v0.1.0:基础功能 A(AC-MOD-01~10) + +#### 5.1 基础功能 A(v0.1.0) +- [AC-MOD-01] WHEN 用户提交请求 THEN 系统 SHALL 处理并返回 +- [AC-MOD-02] WHEN 请求格式错误 THEN 系统 SHALL 返回错误信息 +[折叠的历史 AC] + +
+ +## 6. 追踪映射(Traceability) +[仅保留活跃版本的映射表] + +| AC ID | Endpoint | 方法 | operationId | 备注 | +|------|----------|------|-------------|------| +| AC-MOD-11 | /api/resource | GET | getResource | v0.2.0 | +| AC-MOD-21 | /api/resource | POST | createResource | v0.3.0 | +``` + +### 4.4 AI 执行规则(新增迭代需求时) + +当用户提出新迭代需求时,AI 必须: + +1. **读取 frontmatter**:识别 `version` 和 `active_version` +2. **确定新版本号**:按语义化版本递增(如 `0.6.0` → `0.7.0`) +3. **判断是否需要折叠**: + - 若活跃版本已有 2 个(如 `0.5.0-0.6.0`),则将最旧的版本(`0.5.0`)折叠 + - 若活跃版本仅 1 个,则保留并追加新版本 +4. **追加新需求**: + - 在”当前活跃版本”区域末尾追加新章节(如 `#### 5.7 新功能(v0.7.0)`) + - 新 AC 编号延续上一版本(如上一版本最大为 `AC-AISVC-90`,则新版本从 `AC-AISVC-91` 开始) +5. **更新 frontmatter**: + - `version: “0.7.0”` + - `active_version: “0.6.0-0.7.0”` + - 在 `version_history` 中追加新版本记录 +6. **更新追踪映射表**:仅保留活跃版本的映射 + +### 4.5 折叠策略 + +- **保留活跃版本数**:最多 2 个版本展开(当前版本 + 上一版本) +- **折叠粒度**:按大版本折叠(如 `v0.1.0-0.4.0` 合并为一个折叠块) +- **折叠标记**:使用 `
` 标签,AI 读取时会自动跳过折叠内容 +- **AC 编号连续性**:折叠不影响 AC 编号的全局唯一性,新 AC 始终递增 + +### 4.6 示例:从 v0.2.0 迭代到 v0.3.0 + +**用户需求**:新增”功能增强 C” + +**AI 执行步骤**: +1. 读取 `requirements.md`,识别当前 `version: “0.2.0”`,`active_version: “0.1.0-0.2.0”` +2. 确定新版本号为 `0.3.0` +3. 将 `v0.1.0` 的内容移入 `
` 折叠块 +4. 在”当前活跃版本”区域追加 `#### 5.3 功能增强 C(v0.3.0)` +5. 新 AC 从 `AC-MOD-21` 开始编号(上一版本最大为 `AC-MOD-20`) +6. 更新 frontmatter: + ```yaml + version: “0.3.0” + active_version: “0.2.0-0.3.0” + version_history: + - version: “0.3.0” + ac_range: “AC-MOD-21~30” + description: “功能增强 C” + - version: “0.2.0” + ac_range: “AC-MOD-11~20” + description: “功能扩展 B” + - version: “0.1.0” + ac_range: “AC-MOD-01~10” + description: “基础功能 A(已折叠)” + ``` + +--- + +## 5. 如何执行 1. **前置步骤**:完成模块拆分与依赖接口盘点(第 0 节)。 -2. **发起需求**:仅针对“一个模块”,生成初始 `requirements.md`。 +2. **发起需求**:仅针对”一个模块”,生成初始 `requirements.md`(v0.1.0)。 3. **定义契约**:输出 `openapi.provider.yaml` 与 `openapi.deps.yaml`,并进行接口走查。 4. **架构设计**:生成 `design.md`,明确模块内边界、数据流与依赖策略。 5. **任务执行**:生成并执行 `tasks.md`;调用方优先基于 deps 契约 Mock 并行推进。 +6. **迭代需求**:按”版本化迭代规则”(第 4 节)追加新版本需求,保持文档精简。 diff --git a/spec/ai-service-admin/requirements.md b/spec/ai-service-admin/requirements.md index 25df27a..d784882 100644 --- a/spec/ai-service-admin/requirements.md +++ b/spec/ai-service-admin/requirements.md @@ -1,13 +1,13 @@ --- feature_id: "ASA" title: "AI 中台管理界面(ai-service-admin)需求规范" -status: "draft" -version: "0.3.0" +status: "in-progress" +version: "0.6.0" owners: - "product" - "frontend" - "backend" -last_updated: "2026-02-24" +last_updated: "2026-02-27" source: type: "conversation" ref: "Scoping Result Confirmed" @@ -159,3 +159,252 @@ source: | AC-ASA-20 | /admin/rag/experiments/stream | POST | RAG 实验流式输出(SSE) | | AC-ASA-21 | /admin/rag/experiments/run | POST | Token 统计与耗时 | | AC-ASA-22 | /admin/rag/experiments/run | POST | 支持指定 LLM 配置 | + +--- + +## 9. 迭代需求:智能客服增强管理界面(v0.6.0) + +> 说明:本节为 v0.6.0 迭代新增,为后端智能客服增强功能提供完整的管理界面,包括 Prompt 模板管理、多知识库管理、意图规则管理、话术流程管理、输出护栏管理。 + +### 9.1 Prompt 模板管理 + +- [AC-ASA-23] WHEN 用户访问 Prompt 模板管理页面 THEN 系统 SHALL 展示当前租户下所有模板列表(含模板名称、场景标签、当前发布版本号、更新时间),支持按场景筛选。 + +- [AC-ASA-24] WHEN 用户点击"新建模板"按钮 THEN 系统 SHALL 展示模板创建表单(名称、场景标签、系统指令编辑区),系统指令编辑区支持 `{{variable}}` 语法高亮提示和内置变量列表参考。 + +- [AC-ASA-25] WHEN 用户编辑模板并保存 THEN 系统 SHALL 调用后端创建新版本,展示版本号变更提示,并在模板详情中展示版本历史列表。 + +- [AC-ASA-26] WHEN 用户点击"发布"按钮 THEN 系统 SHALL 弹出确认对话框,确认后调用发布接口,展示发布成功提示,并更新列表中的发布版本号。 + +- [AC-ASA-27] WHEN 用户在版本历史中点击"回滚到此版本" THEN 系统 SHALL 弹出确认对话框,确认后调用回滚接口,展示回滚成功提示。 + +- [AC-ASA-28] WHEN 用户查看模板详情 THEN 系统 SHALL 展示当前发布版本的完整内容、变量定义列表、版本历史时间线(含版本号、状态、创建时间)。 + +### 9.2 多知识库管理 + +- [AC-ASA-29] WHEN 用户访问知识库管理页面 THEN 系统 SHALL 展示知识库列表(含名称、类型标签、文档数量、优先级、启用状态),替代原有的单一文档列表视图。 + +- [AC-ASA-30] WHEN 用户点击"新建知识库"按钮 THEN 系统 SHALL 展示创建表单(名称、类型选择 product/faq/script/policy/general、描述、优先级权重)。 + +- [AC-ASA-31] WHEN 用户点击知识库卡片/行 THEN 系统 SHALL 进入该知识库的文档管理视图,展示该知识库下的文档列表,上传文档时自动关联到当前知识库。 + +- [AC-ASA-32] WHEN 用户编辑知识库信息 THEN 系统 SHALL 支持修改名称、描述、类型、优先级、启用/禁用状态。 + +- [AC-ASA-33] WHEN 用户删除知识库 THEN 系统 SHALL 弹出二次确认对话框(提示将删除所有关联文档和索引数据),确认后调用删除接口。 + +### 9.3 意图规则管理 + +- [AC-ASA-34] WHEN 用户访问意图规则管理页面 THEN 系统 SHALL 展示规则列表(含意图名称、关键词摘要、响应类型标签、优先级、命中次数、启用状态),支持按响应类型筛选。 + +- [AC-ASA-35] WHEN 用户点击"新建规则"按钮 THEN 系统 SHALL 展示规则创建表单: + - 基础信息:意图名称、优先级 + - 匹配条件:关键词标签输入(支持多个)、正则表达式输入(支持多个,带语法校验提示) + - 响应配置:响应类型选择(fixed/rag/flow/transfer),根据类型动态展示: + - `fixed`:固定回复文本编辑区 + - `rag`:知识库多选下拉 + - `flow`:话术流程选择下拉 + - `transfer`:转人工话术编辑区 + +- [AC-ASA-36] WHEN 用户编辑或删除规则 THEN 系统 SHALL 支持修改所有字段,删除时弹出确认对话框。 + +### 9.4 话术流程管理 + +- [AC-ASA-37] WHEN 用户访问话术流程管理页面 THEN 系统 SHALL 展示流程列表(含流程名称、步骤数、启用状态、关联意图规则数)。 + +- [AC-ASA-38] WHEN 用户点击"新建流程"按钮 THEN 系统 SHALL 展示流程编辑器: + - 基础信息:流程名称、描述 + - 步骤编辑区:支持添加/删除/拖拽排序步骤,每个步骤包含: + - 话术内容编辑(支持 `{{variable}}` 占位符) + - 等待用户输入开关 + - 超时设置(秒数 + 超时动作选择:重复/跳过/转人工) + - 下一步条件配置(关键词匹配列表 + 跳转目标步骤) + +- [AC-ASA-39] WHEN 用户查看流程详情 THEN 系统 SHALL 以步骤流程图或时间线形式展示完整流程,直观呈现步骤间的跳转关系。 + +### 9.5 输出护栏管理 + +- [AC-ASA-40] WHEN 用户访问输出护栏管理页面 THEN 系统 SHALL 展示两个标签页:「禁词管理」和「行为规则」。 + +- [AC-ASA-41] WHEN 用户在禁词管理标签页 THEN 系统 SHALL 展示禁词列表(含词语、类别标签、替换策略、命中次数、启用状态),支持按类别筛选,支持批量导入(CSV/文本,每行一个词)。 + +- [AC-ASA-42] WHEN 用户点击"添加禁词"按钮 THEN 系统 SHALL 展示添加表单:词语、类别选择(competitor/sensitive/political/custom)、替换策略选择(mask/replace/block),根据策略动态展示: + - `replace`:替换文本输入框 + - `block`:兜底话术输入框 + +- [AC-ASA-43] WHEN 用户在行为规则标签页 THEN 系统 SHALL 展示行为规则列表(含规则描述、类别、启用状态),支持添加/编辑/删除/启用/禁用。 + +- [AC-ASA-44] WHEN 用户添加行为规则 THEN 系统 SHALL 展示添加表单:规则描述文本区、类别选择(compliance/tone/boundary/custom)。 + +### 9.6 用户故事(v0.6.0 迭代追加) + +- [US-ASA-11] 作为 Prompt 工程师,我希望在界面上创建和管理不同场景的 Prompt 模板,支持版本管理和一键回滚,以便快速迭代 AI 人设和话术风格。 + +- [US-ASA-12] 作为运营人员,我希望创建多个知识库并按类型分类(产品/FAQ/话术/政策),以便让 AI 在不同场景下检索最相关的知识。 + +- [US-ASA-13] 作为运营人员,我希望配置意图识别规则,让特定问题走固定回复或话术流程,以便保证关键场景的回复一致性。 + +- [US-ASA-14] 作为运营人员,我希望编排多步骤的话术流程,引导用户按固定步骤完成信息收集,以便提升客服效率。 + +- [US-ASA-15] 作为合规人员,我希望配置禁词和行为规则,确保 AI 不会输出竞品名称、敏感信息或违规承诺,以便满足合规要求。 + +### 9.7 追踪映射(v0.6.0 迭代追加) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-23 | /admin/prompt-templates | GET | Prompt 模板列表 | +| AC-ASA-24 | /admin/prompt-templates | POST | 创建模板 | +| AC-ASA-25 | /admin/prompt-templates/{tplId} | PUT | 更新模板 | +| AC-ASA-26 | /admin/prompt-templates/{tplId}/publish | POST | 发布模板 | +| AC-ASA-27 | /admin/prompt-templates/{tplId}/rollback | POST | 回滚模板 | +| AC-ASA-28 | /admin/prompt-templates/{tplId} | GET | 模板详情 | +| AC-ASA-29 | /admin/kb/knowledge-bases | GET | 知识库列表 | +| AC-ASA-30 | /admin/kb/knowledge-bases | POST | 创建知识库 | +| AC-ASA-31 | /admin/kb/documents | GET | 知识库文档列表 | +| AC-ASA-32 | /admin/kb/knowledge-bases/{kbId} | PUT | 更新知识库 | +| AC-ASA-33 | /admin/kb/knowledge-bases/{kbId} | DELETE | 删除知识库 | +| AC-ASA-34 | /admin/intent-rules | GET | 意图规则列表 | +| AC-ASA-35 | /admin/intent-rules | POST | 创建意图规则 | +| AC-ASA-36 | /admin/intent-rules/{ruleId} | PUT/DELETE | 编辑/删除规则 | +| AC-ASA-37 | /admin/script-flows | GET | 话术流程列表 | +| AC-ASA-38 | /admin/script-flows | POST | 创建话术流程 | +| AC-ASA-39 | /admin/script-flows/{flowId} | GET | 流程详情 | +| AC-ASA-40 | - | - | 护栏管理页面布局 | +| AC-ASA-41 | /admin/guardrails/forbidden-words | GET | 禁词列表 | +| AC-ASA-42 | /admin/guardrails/forbidden-words | POST | 添加禁词 | +| AC-ASA-43 | /admin/guardrails/behavior-rules | GET | 行为规则列表 | +| AC-ASA-44 | /admin/guardrails/behavior-rules | POST | 添加行为规则 | + +--- + +## 10. 迭代需求:对话流程测试与监控(v0.7.0) + +> 说明:本节为 v0.7.0 迭代新增,为 v0.6.0 新增的四个对话流程控制功能(Prompt 模板、意图规则、话术流程、输出护栏)提供完整的测试和监控能力。 + +### 10.1 Dashboard 统计增强 + +- [AC-ASA-45] WHEN 用户访问 Dashboard 页面 THEN 系统 SHALL 在现有统计卡片基础上新增四个统计卡片: + - 意图规则命中率(命中次数/总对话次数) + - Prompt 模板使用次数(按模板分组统计) + - 话术流程激活次数(按流程分组统计) + - 护栏拦截次数(按类别分组统计) + +- [AC-ASA-46] WHEN 用户点击统计卡片 THEN 系统 SHALL 跳转到对应的详细监控页面(如点击"意图规则命中率"跳转到意图规则监控页面)。 + +- [AC-ASA-47] WHEN Dashboard 加载统计数据 THEN 系统 SHALL 支持按时间范围筛选(今日/本周/本月/自定义),并展示趋势图表。 + +### 10.2 对话流程测试台(RAG 实验室扩展) + +- [AC-ASA-48] WHEN 用户访问 RAG 实验室页面 THEN 系统 SHALL 在原有检索测试基础上新增"完整流程测试"模式切换开关。 + +- [AC-ASA-49] WHEN 用户切换到"完整流程测试"模式并输入测试消息 THEN 系统 SHALL 调用后端完整的 12 步生成流程,并以可视化时间线形式展示每一步的执行结果: + - Step 1: 输入扫描(敏感词检测结果) + - Step 2: 流程检查(是否存在进行中的话术流程) + - Step 3: 意图匹配(命中的规则名称、匹配的关键词/正则) + - Step 4: Prompt 模板(使用的模板名称、版本号、渲染后的系统指令) + - Step 5-7: RAG 检索(检索的知识库、Top-K 结果、相似度分数) + - Step 8: 话术流程(流程状态、当前步骤、下一步动作) + - Step 9: LLM 生成(使用的模型、Token 消耗、生成耗时) + - Step 10: 上下文管理(会话历史长度、记忆摘要) + - Step 11: 输出护栏(命中的禁词、替换策略、是否拦截) + - Step 12: 最终输出(完整回复内容、置信度、是否转人工) + +- [AC-ASA-50] WHEN 某一步执行失败或跳过 THEN 系统 SHALL 在时间线中标记该步骤状态(成功/失败/跳过),并展示失败原因或跳过原因。 + +- [AC-ASA-51] WHEN 用户点击时间线中的某一步 THEN 系统 SHALL 展开该步骤的详细数据(JSON 格式),支持复制和导出。 + +- [AC-ASA-52] WHEN 用户在测试台中修改配置(如切换 Prompt 模板、指定知识库、调整检索参数)THEN 系统 SHALL 支持对比测试,并行展示不同配置下的执行结果差异。 + +### 10.3 意图规则测试与监控 + +- [AC-ASA-53] WHEN 用户在意图规则管理页面点击"测试"按钮 THEN 系统 SHALL 弹出测试对话框,输入测试消息后展示: + - 是否命中该规则 + - 匹配的关键词或正则表达式 + - 规则优先级排序中的位置 + - 如果未命中,展示原因(关键词不匹配/正则不匹配/优先级被其他规则抢占) + +- [AC-ASA-54] WHEN 用户访问意图规则监控页面 THEN 系统 SHALL 展示规则命中统计表格(含规则名称、命中次数、命中率、最近命中时间、平均响应时间),支持按时间范围筛选和导出。 + +- [AC-ASA-55] WHEN 用户点击规则命中统计表格中的某一行 THEN 系统 SHALL 展示该规则的详细命中记录列表(含用户消息、命中时间、匹配的关键词、响应类型、执行结果)。 + +### 10.4 Prompt 模板测试与监控 + +- [AC-ASA-56] WHEN 用户在 Prompt 模板管理页面点击"预览"按钮 THEN 系统 SHALL 弹出预览对话框,展示: + - 模板原始内容(含变量占位符) + - 变量列表及当前值(支持手动输入测试值) + - 渲染后的完整 Prompt(变量替换后的最终内容) + +- [AC-ASA-57] WHEN 用户在预览对话框中修改变量值 THEN 系统 SHALL 实时更新渲染后的 Prompt 内容。 + +- [AC-ASA-58] WHEN 用户访问 Prompt 模板监控页面 THEN 系统 SHALL 展示模板使用统计表格(含模板名称、使用次数、平均 Token 消耗、最近使用时间),支持按场景标签筛选。 + +### 10.5 话术流程测试与监控 + +- [AC-ASA-59] WHEN 用户在话术流程管理页面点击"模拟执行"按钮 THEN 系统 SHALL 弹出模拟对话框,按步骤展示话术内容,用户可输入模拟回复,系统根据下一步条件推进流程,直到流程结束。 + +- [AC-ASA-60] WHEN 用户访问话术流程监控页面 THEN 系统 SHALL 展示流程激活统计表格(含流程名称、激活次数、完成率、平均完成时长、中断率),支持按时间范围筛选。 + +- [AC-ASA-61] WHEN 用户点击流程激活统计表格中的某一行 THEN 系统 SHALL 展示该流程的详细执行记录列表(含会话 ID、激活时间、当前步骤、执行状态、中断原因)。 + +### 10.6 输出护栏测试与监控 + +- [AC-ASA-62] WHEN 用户在输出护栏管理页面点击"测试"按钮 THEN 系统 SHALL 弹出测试对话框,输入测试文本后展示: + - 命中的禁词列表(含词语、类别、位置) + - 应用的替换策略(mask/replace/block) + - 处理后的文本内容 + +- [AC-ASA-63] WHEN 用户访问输出护栏监控页面 THEN 系统 SHALL 展示护栏拦截统计表格(含禁词、拦截次数、类别、最近拦截时间),支持按类别筛选和导出。 + +- [AC-ASA-64] WHEN 用户点击护栏拦截统计表格中的某一行 THEN 系统 SHALL 展示该禁词的详细拦截记录列表(含原始文本、处理后文本、拦截时间、会话 ID)。 + +### 10.7 对话追踪页面 + +- [AC-ASA-65] WHEN 用户访问对话追踪页面 THEN 系统 SHALL 展示对话记录列表(含会话 ID、用户消息、AI 回复、触发的规则、使用的模板、执行耗时、创建时间),支持按时间范围、规则类型、模板、流程筛选。 + +- [AC-ASA-66] WHEN 用户点击对话记录列表中的某一行 THEN 系统 SHALL 展开该对话的完整执行链路(12 步流程详情),以折叠面板形式展示每一步的输入输出和执行状态。 + +- [AC-ASA-67] WHEN 用户在对话追踪页面点击"回放"按钮 THEN 系统 SHALL 以动画形式回放该对话的执行流程,逐步展示每一步的执行过程和数据流转。 + +- [AC-ASA-68] WHEN 用户在对话追踪页面点击"导出"按钮 THEN 系统 SHALL 支持导出对话记录为 JSON/CSV 格式,包含完整的执行链路数据。 + +### 10.8 用户故事(v0.7.0 迭代追加) + +- [US-ASA-16] 作为 AI 开发者,我希望在测试台中输入测试消息并查看完整的 12 步执行流程,以便快速定位问题和优化配置。 + +- [US-ASA-17] 作为运营人员,我希望查看意图规则的命中统计和详细记录,以便评估规则效果并优化关键词配置。 + +- [US-ASA-18] 作为 Prompt 工程师,我希望预览 Prompt 模板的渲染结果并查看使用统计,以便评估模板效果并迭代优化。 + +- [US-ASA-19] 作为运营人员,我希望模拟执行话术流程并查看激活统计,以便验证流程设计并优化步骤配置。 + +- [US-ASA-20] 作为合规人员,我希望测试输出护栏并查看拦截统计,以便确保禁词配置有效并满足合规要求。 + +- [US-ASA-21] 作为系统管理员,我希望查看对话追踪记录并回放执行流程,以便排查问题和分析系统行为。 + +### 10.9 追踪映射(v0.7.0 迭代追加) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-45 | /admin/dashboard/stats | GET | Dashboard 统计增强 | +| AC-ASA-46 | - | - | 前端路由跳转 | +| AC-ASA-47 | /admin/dashboard/stats | GET | 时间范围筛选参数 | +| AC-ASA-48 | - | - | 前端模式切换 | +| AC-ASA-49 | /admin/test/flow-execution | POST | 完整流程测试 | +| AC-ASA-50 | /admin/test/flow-execution | POST | 步骤状态标记 | +| AC-ASA-51 | - | - | 前端详情展开 | +| AC-ASA-52 | /admin/test/flow-execution | POST | 对比测试参数 | +| AC-ASA-53 | /admin/intent-rules/{ruleId}/test | POST | 意图规则测试 | +| AC-ASA-54 | /admin/monitoring/intent-rules | GET | 意图规则监控统计 | +| AC-ASA-55 | /admin/monitoring/intent-rules/{ruleId}/hits | GET | 规则命中记录 | +| AC-ASA-56 | /admin/prompt-templates/{tplId}/preview | POST | Prompt 模板预览 | +| AC-ASA-57 | - | - | 前端实时渲染 | +| AC-ASA-58 | /admin/monitoring/prompt-templates | GET | Prompt 模板监控统计 | +| AC-ASA-59 | /admin/script-flows/{flowId}/simulate | POST | 话术流程模拟执行 | +| AC-ASA-60 | /admin/monitoring/script-flows | GET | 话术流程监控统计 | +| AC-ASA-61 | /admin/monitoring/script-flows/{flowId}/executions | GET | 流程执行记录 | +| AC-ASA-62 | /admin/guardrails/test | POST | 输出护栏测试 | +| AC-ASA-63 | /admin/monitoring/guardrails | GET | 输出护栏监控统计 | +| AC-ASA-64 | /admin/monitoring/guardrails/{wordId}/blocks | GET | 禁词拦截记录 | +| AC-ASA-65 | /admin/monitoring/conversations | GET | 对话追踪列表 | +| AC-ASA-66 | /admin/monitoring/conversations/{conversationId} | GET | 对话执行链路详情 | +| AC-ASA-67 | - | - | 前端回放动画 | +| AC-ASA-68 | /admin/monitoring/conversations/export | POST | 对话记录导出 | diff --git a/spec/ai-service-admin/tasks.md b/spec/ai-service-admin/tasks.md index 80c9f72..085bc6a 100644 --- a/spec/ai-service-admin/tasks.md +++ b/spec/ai-service-admin/tasks.md @@ -2,7 +2,7 @@ module: ai-service-admin title: "AI 中台管理界面(ai-service-admin)任务清单" status: "in-progress" -version: "0.6.0" +version: "0.7.0" owners: - "frontend" - "backend" @@ -363,3 +363,133 @@ principles: - [x] (P12-07) 路由注册与导航菜单:将新增的 5 个页面(Prompt 模板、知识库、意图规则、话术流程、输出护栏)注册到路由和侧边导航菜单 - AC: [AC-ASA-23~AC-ASA-44] + + +--- + +## Phase 13: 测试与监控系统(v0.7.0) + +> 页面导向:为 v0.6.0 新增的四大功能提供完整的测试和监控能力。 + +### 13.1 Dashboard 统计增强 + +- [ ] (P13-01) API 服务层:创建 src/api/monitoring.ts 和 src/types/monitoring.ts + - AC: [AC-ASA-45, AC-ASA-46, AC-ASA-47] + +- [ ] (P13-02) Dashboard 统计卡片组件:实现四个新增统计卡片(意图规则命中率、Prompt 模板使用次数、话术流程激活次数、护栏拦截次数) + - AC: [AC-ASA-45] + +- [ ] (P13-03) 统计卡片跳转:实现点击卡片跳转到对应监控页面的路由逻辑 + - AC: [AC-ASA-46] + +- [ ] (P13-04) 时间范围筛选:实现时间范围选择器(今日/本周/本月/自定义)并集成趋势图表 + - AC: [AC-ASA-47] + +### 13.2 完整流程测试台(RAG 实验室扩展) + +- [ ] (P13-05) 测试模式切换:在 RAG 实验室页面添加"完整流程测试"模式开关 + - AC: [AC-ASA-48] + +- [ ] (P13-06) 流程时间线组件:实现 FlowExecutionTimeline 组件,展示 12 步执行流程 + - AC: [AC-ASA-49, AC-ASA-50] + +- [ ] (P13-07) 步骤详情展开:实现步骤卡片点击展开详细数据(JSON 格式),支持复制和导出 + - AC: [AC-ASA-51] + +- [ ] (P13-08) 对比测试功能:实现并行展示不同配置下的执行结果差异 + - AC: [AC-ASA-52] + +### 13.3 意图规则测试与监控 + +- [ ] (P13-09) 规则测试对话框:在意图规则管理页面实现测试对话框,展示匹配结果和冲突检测 + - AC: [AC-ASA-53] + +- [ ] (P13-10) 意图规则监控页面:创建 /admin/monitoring/intent-rules 页面,展示规则命中统计表格 + - AC: [AC-ASA-54] + +- [ ] (P13-11) 规则命中记录详情:实现命中记录列表弹窗,展示详细命中记录 + - AC: [AC-ASA-55] + +### 13.4 Prompt 模板测试与监控 + +- [ ] (P13-12) 模板预览对话框:在 Prompt 模板管理页面实现预览对话框,支持变量测试和实时渲染 + - AC: [AC-ASA-56, AC-ASA-57] + +- [ ] (P13-13) Prompt 模板监控页面:创建 /admin/monitoring/prompt-templates 页面,展示模板使用统计 + - AC: [AC-ASA-58] + +### 13.5 话术流程测试与监控 + +- [ ] (P13-14) 流程模拟对话框:在话术流程管理页面实现模拟执行对话框,支持步骤推进 + - AC: [AC-ASA-59] + +- [ ] (P13-15) 话术流程监控页面:创建 /admin/monitoring/script-flows 页面,展示流程激活统计 + - AC: [AC-ASA-60] + +- [ ] (P13-16) 流程执行记录详情:实现执行记录列表弹窗,展示详细执行记录 + - AC: [AC-ASA-61] + +### 13.6 输出护栏测试与监控 + +- [ ] (P13-17) 护栏测试对话框:在输出护栏管理页面实现测试对话框,展示禁词检测结果 + - AC: [AC-ASA-62] + +- [ ] (P13-18) 输出护栏监控页面:创建 /admin/monitoring/guardrails 页面,展示护栏拦截统计 + - AC: [AC-ASA-63] + +- [ ] (P13-19) 护栏拦截记录详情:实现拦截记录列表弹窗,展示详细拦截记录 + - AC: [AC-ASA-64] + +### 13.7 对话追踪页面 + +- [ ] (P13-20) 对话追踪列表页:创建 /admin/monitoring/conversations 页面,展示对话记录列表 + - AC: [AC-ASA-65] + +- [ ] (P13-21) 对话执行链路详情:实现对话详情弹窗,展示完整 12 步执行链路 + - AC: [AC-ASA-66] + +- [ ] (P13-22) 对话回放功能:实现动画回放对话执行流程 + - AC: [AC-ASA-67] + +- [ ] (P13-23) 对话记录导出:实现导出功能,支持 JSON/CSV 格式 + - AC: [AC-ASA-68] + +### 13.8 路由与导航整合 + +- [ ] (P13-24) 监控页面路由注册:将新增的 6 个监控页面注册到路由系统 + - AC: [AC-ASA-45~AC-ASA-68] + +- [ ] (P13-25) 导航菜单更新:在侧边导航菜单中添加"监控中心"分组,包含所有监控页面入口 + - AC: [AC-ASA-45~AC-ASA-68] + +--- + +## Phase 13 任务进度追踪 + +| 任务 | 描述 | 状态 | +|------|------|------| +| P13-01 | API 服务层 | ⏳ 待处理 | +| P13-02 | Dashboard 统计卡片 | ⏳ 待处理 | +| P13-03 | 统计卡片跳转 | ⏳ 待处理 | +| P13-04 | 时间范围筛选 | ⏳ 待处理 | +| P13-05 | 测试模式切换 | ⏳ 待处理 | +| P13-06 | 流程时间线组件 | ⏳ 待处理 | +| P13-07 | 步骤详情展开 | ⏳ 待处理 | +| P13-08 | 对比测试功能 | ⏳ 待处理 | +| P13-09 | 规则测试对话框 | ⏳ 待处理 | +| P13-10 | 意图规则监控页面 | ⏳ 待处理 | +| P13-11 | 规则命中记录详情 | ⏳ 待处理 | +| P13-12 | 模板预览对话框 | ⏳ 待处理 | +| P13-13 | Prompt 模板监控页面 | ⏳ 待处理 | +| P13-14 | 流程模拟对话框 | ⏳ 待处理 | +| P13-15 | 话术流程监控页面 | ⏳ 待处理 | +| P13-16 | 流程执行记录详情 | ⏳ 待处理 | +| P13-17 | 护栏测试对话框 | ⏳ 待处理 | +| P13-18 | 输出护栏监控页面 | ⏳ 待处理 | +| P13-19 | 护栏拦截记录详情 | ⏳ 待处理 | +| P13-20 | 对话追踪列表页 | ⏳ 待处理 | +| P13-21 | 对话执行链路详情 | ⏳ 待处理 | +| P13-22 | 对话回放功能 | ⏳ 待处理 | +| P13-23 | 对话记录导出 | ⏳ 待处理 | +| P13-24 | 监控页面路由注册 | ⏳ 待处理 | +| P13-25 | 导航菜单更新 | ⏳ 待处理 | diff --git a/spec/ai-service/design.md b/spec/ai-service/design.md index f326773..89c3905 100644 --- a/spec/ai-service/design.md +++ b/spec/ai-service/design.md @@ -2,11 +2,12 @@ feature_id: "AISVC" title: "Python AI 中台(ai-service)技术设计" status: "draft" -version: "0.1.0" -last_updated: "2026-02-24" +version: "0.7.0" +last_updated: "2026-02-27" inputs: - "spec/ai-service/requirements.md" - "spec/ai-service/openapi.provider.yaml" + - "spec/ai-service/openapi.admin.yaml" - "java/openapi.deps.yaml" --- @@ -314,3 +315,1826 @@ MVP 提供 `VectorRetriever(Qdrant)`。 - 租户级 collection 数量增长:若租户数量巨大,Qdrant collection 管理成本上升;可在规模化阶段切换为“单 collection + payload tenant filter”并加强隔离校验。 - 上下文膨胀:仅截断可能影响长会话体验;后续可引入摘要记忆与检索式记忆。 - 置信度定义:MVP 先以规则/阈值实现,后续引入离线评测与校准。 + +--- + +## 10. v0.6.0 智能客服增强 — 总体架构升级 + +### 10.1 升级后的 Orchestrator 数据流 + +原有 8 步 pipeline 升级为 12 步,新增步骤用 `[NEW]` 标记: + +``` +API 层接收请求 → 提取 tenantId + body → 调用 Orchestrator + +Orchestrator: + 1) Memory.load(tenantId, sessionId) + 2) merge_context(local_history, external_history) + 3) [NEW] InputGuardrail.scan(currentMessage) → 前置禁词检测(仅记录,不阻断) + 4) [NEW] FlowEngine.check_active_flow(sessionId) → 检查是否有进行中的话术流程 + ├─ 有活跃流程 → FlowEngine.advance(user_input) → 返回话术内容 → 跳到步骤 11 + └─ 无活跃流程 → 继续步骤 5 + 5) [NEW] IntentRouter.match(currentMessage, tenantId) → 意图识别(关键词+正则) + ├─ fixed → 返回固定回复 → 跳到步骤 11 + ├─ flow → FlowEngine.start(flowId, sessionId) → 返回首步话术 → 跳到步骤 11 + ├─ transfer → shouldTransfer=true + 转人工话术 → 跳到步骤 11 + ├─ rag → 设置 target_kb_ids → 继续步骤 6 + └─ 未命中 → target_kb_ids=按优先级全部 → 继续步骤 6 + 6) [NEW] QueryRewriter.rewrite(currentMessage, history) → Query 改写(LLM 调用,解析指代词) + 7) Retrieval.retrieve(rewritten_query, tenantId, target_kb_ids) → 多知识库定向检索 + 8) [NEW] ResultRanker.rank(hits, kb_priorities) → 分层排序(按知识库类型优先级) + 9) [NEW] PromptBuilder.build(template, evidence, history, message) → 从数据库模板构建 Prompt + 10) LLM.generate(messages) 或 LLM.stream_generate(messages) + 11) [NEW] OutputGuardrail.filter(reply) → 后置禁词过滤(mask/replace/block) + 12) compute_confidence(retrieval_result) + 13) Memory.append(tenantId, sessionId, user + assistant messages) + 14) 返回 ChatResponse +``` + +### 10.2 新增模块与现有模块的关系 + +``` +app/ +├── services/ +│ ├── orchestrator.py # [修改] 升级为 12 步 pipeline +│ ├── prompt/ # [新增] Prompt 模板服务 +│ │ ├── template_service.py # 模板 CRUD + 版本管理 + 缓存 +│ │ └── variable_resolver.py # 变量替换引擎 +│ ├── intent/ # [新增] 意图识别与路由 +│ │ ├── router.py # IntentRouter:规则匹配引擎 +│ │ └── rule_service.py # 规则 CRUD +│ ├── flow/ # [新增] 话术流程引擎 +│ │ ├── engine.py # FlowEngine:状态机执行 +│ │ └── flow_service.py # 流程 CRUD +│ ├── guardrail/ # [新增] 输出护栏 +│ │ ├── input_scanner.py # 输入前置检测 +│ │ ├── output_filter.py # 输出后置过滤 +│ │ └── word_service.py # 禁词/行为规则 CRUD +│ ├── retrieval/ +│ │ ├── optimized_retriever.py # [修改] 支持 target_kb_ids 参数 +│ │ ├── query_rewriter.py # [新增] Query 改写 +│ │ └── result_ranker.py # [新增] 分层排序 +│ ├── kb.py # [修改] 支持多知识库 CRUD +│ └── ...(现有模块不变) +├── api/ +│ └── admin/ +│ ├── prompt_templates.py # [新增] Prompt 模板管理 API +│ ├── intent_rules.py # [新增] 意图规则管理 API +│ ├── script_flows.py # [新增] 话术流程管理 API +│ ├── guardrails.py # [新增] 护栏管理 API(禁词+行为规则) +│ ├── kb.py # [修改] 新增知识库 CRUD 端点 +│ └── ...(现有 API 不变) +├── models/ +│ └── entities.py # [修改] 新增实体定义 +└── core/ + └── prompts.py # [修改] 改为从数据库加载,保留硬编码作为 fallback +``` + +--- + +## 11. Prompt 模板系统设计 + +### 11.1 数据模型 + +``` +prompt_templates 表 +├── id: UUID (PK) +├── tenant_id: VARCHAR (NOT NULL, FK) +├── name: VARCHAR (模板名称,如"默认客服人设") +├── scene: VARCHAR (场景标签:chat/rag_qa/greeting/farewell) +├── description: TEXT (模板描述) +├── is_default: BOOLEAN (是否为该场景的默认模板) +├── created_at: TIMESTAMP +├── updated_at: TIMESTAMP +└── INDEX: (tenant_id, scene) + +prompt_template_versions 表 +├── id: UUID (PK) +├── template_id: UUID (FK → prompt_templates.id) +├── version: INTEGER (自增版本号) +├── status: VARCHAR (draft/published/archived) +├── system_instruction: TEXT (系统指令内容,支持 {{variable}} 占位符) +├── variables: JSONB (变量定义列表,如 [{"name":"persona_name","default":"小N","description":"人设名称"}]) +├── created_at: TIMESTAMP +└── INDEX: (template_id, status) +└── UNIQUE: 同一 template_id 下仅一个 status=published +``` + +### 11.2 变量替换引擎 + +内置变量(系统自动注入,无需用户定义): + +| 变量 | 说明 | 示例值 | +|------|------|--------| +| `{{persona_name}}` | 人设名称 | 小N | +| `{{current_time}}` | 当前时间 | 2026-02-27 14:30 | +| `{{channel_type}}` | 渠道类型 | wechat | +| `{{tenant_name}}` | 租户名称 | 某某公司 | +| `{{session_id}}` | 会话ID | kf_001_wx123 | + +自定义变量:由模板定义,管理员在模板中声明变量名和默认值。 + +替换流程: +1. 加载已发布版本的 `system_instruction` +2. 合并内置变量 + 自定义变量默认值 +3. 执行 `{{variable}}` 模式替换 +4. 注入行为规则(从 guardrails 加载,追加到系统指令末尾) +5. 输出最终 system message + +### 11.3 缓存策略 + +- 使用内存缓存(dict),key = `(tenant_id, scene)`,value = 已发布版本的完整模板 +- 发布/回滚操作时主动失效缓存 +- 缓存 TTL = 300s(兜底过期,防止分布式场景下缓存不一致) +- fallback:缓存未命中且数据库无模板时,使用现有硬编码的 `SYSTEM_PROMPT` 作为兜底 + +--- + +## 12. 多知识库设计 + +### 12.1 数据模型 + +``` +knowledge_bases 表(扩展现有 KnowledgeBase 实体) +├── id: VARCHAR (PK, 如 "kb_product_001") +├── tenant_id: VARCHAR (NOT NULL) +├── name: VARCHAR (知识库名称) +├── kb_type: VARCHAR (product/faq/script/policy/general) +├── description: TEXT +├── priority: INTEGER (优先级权重,数值越大越优先,默认 0) +├── is_enabled: BOOLEAN (默认 true) +├── doc_count: INTEGER (文档数量,冗余统计) +├── created_at: TIMESTAMP +├── updated_at: TIMESTAMP +└── INDEX: (tenant_id, kb_type) +``` + +### 12.2 Qdrant Collection 命名升级 + +现有:`kb_{tenant_id}`(单 collection) + +升级为:`kb_{tenant_id}_{kb_id}`(每个知识库独立 collection) + +兼容策略: +- 新创建的知识库使用新命名 +- 现有 `kb_{tenant_id}` collection 映射为 `kb_default` 知识库(自动迁移) +- 检索时如果 target_kb_ids 包含 `kb_default`,同时搜索新旧两种命名的 collection + +### 12.3 多知识库检索流程 + +``` +target_kb_ids(来自意图路由或默认全部) + → 按 kb_type 优先级排序:script > faq > product > policy > general + → 并行检索各 collection(使用现有 OptimizedRetriever) + → 合并结果,按 (kb_type_priority, score) 双维度排序 + → 截断到 maxEvidenceTokens + → 输出 ranked_hits +``` + +--- + +## 13. 意图识别与规则引擎设计 + +### 13.1 数据模型 + +``` +intent_rules 表 +├── id: UUID (PK) +├── tenant_id: VARCHAR (NOT NULL) +├── name: VARCHAR (意图名称,如"退货意图") +├── keywords: JSONB (关键词列表,如 ["退货","退款","不想要了"]) +├── patterns: JSONB (正则模式列表,如 ["退.*货","怎么退"]) +├── priority: INTEGER (优先级,数值越大越先匹配) +├── response_type: VARCHAR (flow/rag/fixed/transfer) +├── target_kb_ids: JSONB (rag 类型时关联的知识库 ID 列表) +├── flow_id: UUID (flow 类型时关联的流程 ID) +├── fixed_reply: TEXT (fixed 类型时的固定回复内容) +├── transfer_message: TEXT (transfer 类型时的转人工话术) +├── is_enabled: BOOLEAN (默认 true) +├── hit_count: BIGINT (命中统计,默认 0) +├── created_at: TIMESTAMP +├── updated_at: TIMESTAMP +└── INDEX: (tenant_id, is_enabled, priority DESC) +``` + +### 13.2 匹配算法 + +```python +class IntentRouter: + def match(self, message: str, tenant_id: str) -> Optional[IntentMatchResult]: + rules = self._load_rules(tenant_id) # 按 priority DESC 排序,已缓存 + for rule in rules: + if not rule.is_enabled: + continue + # 1. 关键词匹配(任一命中即匹配) + for keyword in rule.keywords: + if keyword in message: + return IntentMatchResult(rule=rule, match_type="keyword", matched=keyword) + # 2. 正则匹配(任一命中即匹配) + for pattern in rule.patterns: + if re.search(pattern, message): + return IntentMatchResult(rule=rule, match_type="regex", matched=pattern) + return None # 未命中,走默认 RAG +``` + +### 13.3 缓存策略 + +- 规则列表按 `tenant_id` 缓存在内存中 +- 规则 CRUD 操作时主动失效缓存 +- 缓存 TTL = 60s + +--- + +## 14. 话术流程引擎设计 + +### 14.1 数据模型 + +``` +script_flows 表 +├── id: UUID (PK) +├── tenant_id: VARCHAR (NOT NULL) +├── name: VARCHAR (流程名称) +├── description: TEXT +├── steps: JSONB (步骤列表,见下方结构) +├── is_enabled: BOOLEAN (默认 true) +├── created_at: TIMESTAMP +├── updated_at: TIMESTAMP +└── INDEX: (tenant_id) + +steps JSONB 结构: +[ + { + "step_no": 1, + "content": "您好,请问您的订单号是多少?", + "wait_input": true, + "timeout_seconds": 120, + "timeout_action": "repeat", // repeat/skip/transfer + "next_conditions": [ + {"keywords": ["不知道","忘了"], "goto_step": 3}, + {"pattern": "\\d{10,}", "goto_step": 2} + ], + "default_next": 2 // 无条件匹配时的下一步 + }, + ... +] + +flow_instances 表(运行时状态) +├── id: UUID (PK) +├── tenant_id: VARCHAR (NOT NULL) +├── session_id: VARCHAR (NOT NULL) +├── flow_id: UUID (FK → script_flows.id) +├── current_step: INTEGER (当前步骤序号) +├── status: VARCHAR (active/completed/timeout/cancelled) +├── context: JSONB (流程执行上下文,存储用户输入等) +├── started_at: TIMESTAMP +├── updated_at: TIMESTAMP +├── completed_at: TIMESTAMP (nullable) +└── UNIQUE: (tenant_id, session_id, status='active') -- 同一会话同时只有一个活跃流程 +``` + +### 14.2 状态机 + +``` + ┌─────────────┐ + │ IDLE │ (无活跃流程) + └──────┬──────┘ + │ 意图命中 flow 规则 + ▼ + ┌─────────────┐ + ┌────►│ ACTIVE │◄────┐ + │ └──────┬──────┘ │ + │ │ │ + │ 用户输入匹配条件 │ 用户输入不匹配 + │ │ │ → 重复当前步骤 + │ ▼ │ + │ 推进到下一步 ────────┘ + │ │ + │ 到达最后一步 + │ │ + │ ▼ + │ ┌─────────────┐ + │ │ COMPLETED │ + │ └─────────────┘ + │ + │ 超时 / 用户触发退出 + │ │ + │ ▼ + │ ┌─────────────┐ + └─────│ TIMEOUT / │ + │ CANCELLED │ + └─────────────┘ +``` + +### 14.3 FlowEngine 核心逻辑 + +```python +class FlowEngine: + async def check_active_flow(self, tenant_id: str, session_id: str) -> Optional[FlowInstance]: + """检查会话是否有活跃流程""" + return await self.repo.get_active_instance(tenant_id, session_id) + + async def start(self, flow_id: str, tenant_id: str, session_id: str) -> str: + """启动流程,返回第一步话术""" + flow = await self.repo.get_flow(flow_id) + instance = FlowInstance(flow_id=flow_id, session_id=session_id, current_step=1, status="active") + await self.repo.save_instance(instance) + return flow.steps[0]["content"] + + async def advance(self, instance: FlowInstance, user_input: str) -> FlowAdvanceResult: + """根据用户输入推进流程""" + flow = await self.repo.get_flow(instance.flow_id) + current = flow.steps[instance.current_step - 1] + + # 匹配下一步条件 + next_step = self._match_next(current, user_input) + + if next_step > len(flow.steps): + # 流程结束 + instance.status = "completed" + await self.repo.save_instance(instance) + return FlowAdvanceResult(completed=True, message=None) + + instance.current_step = next_step + await self.repo.save_instance(instance) + return FlowAdvanceResult(completed=False, message=flow.steps[next_step - 1]["content"]) +``` + +--- + +## 15. 输出护栏设计 + +### 15.1 数据模型 + +``` +forbidden_words 表 +├── id: UUID (PK) +├── tenant_id: VARCHAR (NOT NULL) +├── word: VARCHAR (禁词) +├── category: VARCHAR (competitor/sensitive/political/custom) +├── strategy: VARCHAR (mask/replace/block) +├── replacement: TEXT (replace 策略时的替换文本) +├── fallback_reply: TEXT (block 策略时的兜底话术) +├── is_enabled: BOOLEAN (默认 true) +├── hit_count: BIGINT (命中统计,默认 0) +├── created_at: TIMESTAMP +├── updated_at: TIMESTAMP +└── INDEX: (tenant_id, is_enabled) + +behavior_rules 表 +├── id: UUID (PK) +├── tenant_id: VARCHAR (NOT NULL) +├── rule_text: TEXT (行为约束描述,如"不允许承诺具体赔偿金额") +├── category: VARCHAR (compliance/tone/boundary/custom) +├── is_enabled: BOOLEAN (默认 true) +├── created_at: TIMESTAMP +├── updated_at: TIMESTAMP +└── INDEX: (tenant_id, is_enabled) +``` + +### 15.2 输出过滤流程 + +```python +class OutputGuardrail: + async def filter(self, reply: str, tenant_id: str) -> GuardrailResult: + words = self._load_words(tenant_id) # 已缓存 + triggered = [] + filtered_reply = reply + + for word in words: + if not word.is_enabled: + continue + if word.word in filtered_reply: + triggered.append(word) + if word.strategy == "block": + # 整条拦截,返回兜底话术 + return GuardrailResult( + reply=word.fallback_reply or "抱歉,让我换个方式回答您", + blocked=True, + triggered_words=[w.word for w in triggered] + ) + elif word.strategy == "mask": + filtered_reply = filtered_reply.replace(word.word, "*" * len(word.word)) + elif word.strategy == "replace": + filtered_reply = filtered_reply.replace(word.word, word.replacement) + + return GuardrailResult( + reply=filtered_reply, + blocked=False, + triggered_words=[w.word for w in triggered] + ) +``` + +### 15.3 Streaming 模式下的护栏处理 + +SSE 流式输出时,禁词过滤需要特殊处理: + +- 维护一个滑动窗口缓冲区(buffer),大小 = 最长禁词长度 +- 每次收到 LLM delta 时追加到 buffer +- 当 buffer 长度超过窗口大小时,对已确认安全的前缀执行输出 +- 在 `final` 事件前对剩余 buffer 做最终检查 +- `block` 策略在流式模式下:检测到禁词后立即停止输出,发送 `error` 事件并附带兜底话术 + +### 15.4 行为规则注入 + +行为规则不做运行时检测,而是注入到 Prompt 中作为 LLM 的行为约束: + +``` +[系统指令] +{模板内容} + +[行为约束 - 以下规则必须严格遵守] +1. 不允许承诺具体赔偿金额 +2. 不允许透露内部流程 +3. 不允许评价竞品 +... +``` + +--- + +## 16. 智能 RAG 增强设计 + +### 16.1 Query 改写 + +```python +class QueryRewriter: + REWRITE_PROMPT = """根据对话历史,改写用户的最新问题,使其语义完整、适合知识库检索。 +规则: +- 解析指代词("它"、"这个"等),替换为具体实体 +- 补全省略的主语或宾语 +- 保持原意,不添加额外信息 +- 如果问题已经足够清晰,直接返回原文 + +对话历史: +{history} + +用户最新问题:{query} + +改写后的检索查询:""" + + async def rewrite(self, query: str, history: list, llm_client: LLMClient) -> str: + if not history or len(history) < 2: + return query # 无历史或历史太短,不改写 + messages = [{"role": "user", "content": self.REWRITE_PROMPT.format( + history=self._format_history(history[-6:]), # 最近 3 轮 + query=query + )}] + result = await llm_client.generate(messages, max_tokens=100, temperature=0) + return result.content.strip() or query +``` + +### 16.2 分层排序 + +```python +KB_TYPE_PRIORITY = { + "script": 50, # 话术模板最高 + "faq": 40, # FAQ 次之 + "product": 30, # 产品知识 + "policy": 20, # 政策规范 + "general": 10, # 通用最低 +} + +class ResultRanker: + def rank(self, hits: list[RetrievalHit], kb_map: dict[str, KnowledgeBase]) -> list[RetrievalHit]: + """按 (kb_type_priority DESC, score DESC) 双维度排序""" + def sort_key(hit): + kb = kb_map.get(hit.kb_id) + type_priority = KB_TYPE_PRIORITY.get(kb.kb_type, 0) if kb else 0 + custom_priority = kb.priority if kb else 0 + return (-(type_priority + custom_priority), -hit.score) + return sorted(hits, key=sort_key) +``` + +--- + +## 17. 新增数据库实体汇总 + +v0.6.0 新增以下 SQLModel 实体(均包含 `tenant_id` 字段,遵循现有多租户隔离模式): + +| 实体 | 表名 | 用途 | +|------|------|------| +| PromptTemplate | prompt_templates | Prompt 模板主表 | +| PromptTemplateVersion | prompt_template_versions | 模板版本表 | +| KnowledgeBase(扩展) | knowledge_bases | 知识库主表(新增 kb_type/priority/is_enabled) | +| IntentRule | intent_rules | 意图规则表 | +| ScriptFlow | script_flows | 话术流程定义表 | +| FlowInstance | flow_instances | 流程运行实例表 | +| ForbiddenWord | forbidden_words | 禁词表 | +| BehaviorRule | behavior_rules | 行为规则表 | + +--- + +## 18. v0.6.0 风险与待澄清 + +- Query 改写的 LLM 调用会增加约 0.5-1s 延迟和额外 token 消耗;可通过配置开关控制是否启用。 +- 流式模式下的禁词滑动窗口可能导致输出延迟(等待 buffer 填满);需要在实时性和安全性之间权衡窗口大小。 +- 多知识库并行检索会增加 Qdrant 负载;需要评估并发 collection 搜索的性能影响。 +- 话术流程的超时检测依赖调用方(Java 侧)触发;需要与 Java 侧约定超时回调机制。 +- 现有 `kb_default` 到多知识库的数据迁移需要平滑过渡,不能中断现有服务。 + +--- + +## 19. v0.7.0 测试与监控系统设计 + +### 19.1 设计目标与范围 + +#### 19.1.1 核心目标 +- **可测试性**:为 v0.6.0 新增的四大功能(Prompt 模板、意图规则、话术流程、输出护栏)提供独立测试能力。 +- **可观测性**:提供细粒度的运行时监控数据,支持规则命中率、流程执行状态、护栏拦截统计等。 +- **可追溯性**:完整记录对话流程的 12 步执行细节,支持问题排查与效果评估。 +- **可导出性**:支持对话数据导出,便于离线分析与模型优化。 + +#### 19.1.2 设计约束 +- **性能优先**:监控数据采集不能显著影响对话生成性能(目标:<5% 延迟增加)。 +- **存储可控**:完整流程测试的详细日志仅保留 7 天,避免存储膨胀。 +- **租户隔离**:所有测试与监控数据必须按 `tenant_id` 隔离。 +- **向后兼容**:新增监控不影响现有 `/ai/chat` 接口的行为与性能。 + +--- + +### 19.2 总体架构 + +#### 19.2.1 监控数据流 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Admin API Layer │ +│ /admin/test/* /admin/monitoring/* /admin/dashboard/* │ +└────────────┬────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Monitoring Service Layer │ +│ ├─ FlowTestService (完整流程测试) │ +│ ├─ IntentMonitor (意图规则监控) │ +│ ├─ PromptMonitor (Prompt 模板监控) │ +│ ├─ FlowMonitor (话术流程监控) │ +│ ├─ GuardrailMonitor (护栏监控) │ +│ └─ ConversationTracker (对话追踪) │ +└────────────┬────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Orchestrator (增强) │ +│ 12-step pipeline + 监控埋点 (可选开关) │ +└────────────┬────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Data Storage Layer │ +│ ├─ PostgreSQL (统计数据、对话记录) │ +│ └─ Redis (缓存、实时统计) │ +└─────────────────────────────────────────────────────────────┘ +``` + +#### 19.2.2 监控模式 + +**生产模式(默认)**: +- 仅记录关键指标(命中次数、错误率、平均延迟) +- 不记录详细的步骤执行日志 +- 性能影响 <2% + +**测试模式(显式开启)**: +- 记录完整的 12 步执行细节 +- 包含每步的输入/输出/耗时/错误 +- 仅用于 `/admin/test/*` 端点 +- 数据保留 7 天 + +--- + +### 19.3 Dashboard 统计增强设计 + +#### 19.3.1 新增统计指标(对应 AC-AISVC-91, AC-AISVC-92) + +在现有 `GET /admin/dashboard/stats` 响应中新增以下字段: + +```python +# 意图规则统计 +intentRuleHitRate: float # 命中率 = 命中次数 / 总对话次数 +intentRuleHitCount: int # 总命中次数 +intentRuleTopHits: list[dict] # Top 5 命中规则 [{"ruleId", "name", "hitCount"}] + +# Prompt 模板统计 +promptTemplateUsageCount: int # 模板使用总次数 +promptTemplateActiveCount: int # 已发布模板数量 +promptTemplateTopUsed: list[dict] # Top 5 使用模板 [{"templateId", "name", "usageCount"}] + +# 话术流程统计 +scriptFlowActivationCount: int # 流程激活总次数 +scriptFlowCompletionRate: float # 完成率 = 完成次数 / 激活次数 +scriptFlowTopActive: list[dict] # Top 5 活跃流程 [{"flowId", "name", "activationCount"}] + +# 护栏统计 +guardrailBlockCount: int # 拦截总次数 +guardrailBlockRate: float # 拦截率 = 拦截次数 / 总对话次数 +guardrailTopWords: list[dict] # Top 5 触发禁词 [{"word", "category", "hitCount"}] +``` + +#### 19.3.2 数据来源与计算 + +**实时统计(Redis)**: +- 使用 Redis Hash 存储租户级计数器 +- Key 格式:`stats:{tenant_id}:{metric}:{date}` +- 每次对话结束时异步更新(不阻塞响应) +- TTL = 90 天 + +**聚合统计(PostgreSQL)**: +- 从现有表的 `hit_count` 字段聚合(intent_rules, forbidden_words) +- 从 `flow_instances` 表统计流程激活与完成 +- 从 `chat_messages` 表关联 `prompt_template_id`(需新增字段) + +#### 19.3.3 性能优化 + +- Dashboard 统计结果缓存 60 秒(Redis) +- Top N 排行榜每 5 分钟预计算一次(后台任务) +- 避免实时聚合大表,使用增量计数器 + +--- + +### 19.4 完整流程测试台设计 + +#### 19.4.1 测试接口(对应 AC-AISVC-93 ~ AC-AISVC-96) + +**端点**:`POST /admin/test/flow-execution` + +**请求体**: +```json +{ + "message": "我想退货", + "sessionId": "test_session_001", + "channelType": "wechat", + "history": [...], // 可选 + "metadata": {...}, // 可选 + "enableDetailedLog": true // 是否记录详细日志 +} +``` + +**响应体**: +```json +{ + "executionId": "exec_uuid", + "steps": [ + { + "step": 1, + "name": "Memory.load", + "status": "success", + "durationMs": 12, + "input": {"sessionId": "test_session_001"}, + "output": {"messageCount": 5}, + "error": null, + "metadata": {} + }, + { + "step": 3, + "name": "InputGuardrail.scan", + "status": "success", + "durationMs": 8, + "input": {"message": "我想退货"}, + "output": {"triggered": false, "words": []}, + "error": null, + "metadata": {} + }, + { + "step": 5, + "name": "IntentRouter.match", + "status": "success", + "durationMs": 15, + "input": {"message": "我想退货"}, + "output": { + "matched": true, + "ruleId": "rule_001", + "ruleName": "退货意图", + "responseType": "flow", + "flowId": "flow_return_001" + }, + "error": null, + "metadata": {"priority": 100, "matchType": "keyword"} + }, + // ... 其他步骤 + ], + "finalResponse": { + "reply": "您好,请问您的订单号是多少?", + "confidence": 0.95, + "shouldTransfer": false + }, + "totalDurationMs": 1250, + "summary": { + "successSteps": 11, + "failedSteps": 0, + "skippedSteps": 1 + } +} +``` + +#### 19.4.2 实现策略 + +**Orchestrator 增强**: +```python +class Orchestrator: + async def generate_with_monitoring( + self, + request: ChatRequest, + tenant_id: str, + enable_detailed_log: bool = False + ) -> tuple[ChatResponse, Optional[list[StepLog]]]: + """ + 增强版生成方法,支持可选的详细日志记录 + """ + step_logs = [] if enable_detailed_log else None + + # Step 1: Memory.load + step_start = time.time() + try: + history = await self.memory.load_history(tenant_id, request.sessionId) + if step_logs is not None: + step_logs.append(StepLog( + step=1, + name="Memory.load", + status="success", + durationMs=int((time.time() - step_start) * 1000), + input={"sessionId": request.sessionId}, + output={"messageCount": len(history)} + )) + except Exception as e: + if step_logs is not None: + step_logs.append(StepLog( + step=1, + name="Memory.load", + status="failed", + durationMs=int((time.time() - step_start) * 1000), + error=str(e) + )) + raise + + # ... 其他步骤类似 + + return response, step_logs +``` + +**测试端点实现**: +```python +@router.post("/admin/test/flow-execution") +async def test_flow_execution( + request: FlowTestRequest, + tenant_id: str = Depends(get_current_tenant_id), + session: AsyncSession = Depends(get_session) +): + # 调用增强版 Orchestrator + response, step_logs = await orchestrator.generate_with_monitoring( + ChatRequest( + message=request.message, + sessionId=request.sessionId, + channelType=request.channelType, + history=request.history, + metadata=request.metadata + ), + tenant_id=tenant_id, + enable_detailed_log=request.enableDetailedLog + ) + + # 保存测试记录(可选,用于历史查询) + if request.enableDetailedLog: + test_record = FlowTestRecord( + tenant_id=tenant_id, + session_id=request.sessionId, + steps=step_logs, + final_response=response, + created_at=datetime.utcnow() + ) + await save_test_record(session, test_record) + + return FlowExecutionResult( + executionId=str(uuid.uuid4()), + steps=step_logs, + finalResponse=response, + totalDurationMs=sum(s.durationMs for s in step_logs), + summary={ + "successSteps": sum(1 for s in step_logs if s.status == "success"), + "failedSteps": sum(1 for s in step_logs if s.status == "failed"), + "skippedSteps": sum(1 for s in step_logs if s.status == "skipped") + } + ) +``` + +--- + +### 19.5 意图规则测试与监控设计 + +#### 19.5.1 独立测试接口(对应 AC-AISVC-97 ~ AC-AISVC-99) + +**端点**:`POST /admin/intent-rules/{ruleId}/test` + +**请求体**: +```json +{ + "testMessages": [ + "我想退货", + "能退款吗", + "这个产品怎么样" + ] +} +``` + +**响应体**: +```json +{ + "ruleId": "rule_001", + "ruleName": "退货意图", + "results": [ + { + "message": "我想退货", + "matched": true, + "matchedKeywords": ["退货"], + "matchedPatterns": [], + "matchType": "keyword", + "priority": 100, + "conflictRules": [] + }, + { + "message": "能退款吗", + "matched": true, + "matchedKeywords": ["退款"], + "matchedPatterns": [], + "matchType": "keyword", + "priority": 100, + "conflictRules": [] + }, + { + "message": "这个产品怎么样", + "matched": false, + "matchedKeywords": [], + "matchedPatterns": [], + "matchType": null, + "priority": 100, + "conflictRules": [ + { + "ruleId": "rule_002", + "ruleName": "产品咨询", + "priority": 80, + "reason": "可能匹配产品咨询规则" + } + ] + } + ], + "summary": { + "totalTests": 3, + "matchedCount": 2, + "matchRate": 0.67 + } +} +``` + +#### 19.5.2 冲突检测算法 + +```python +class IntentRuleTester: + async def test_rule( + self, + rule: IntentRule, + test_messages: list[str], + tenant_id: str + ) -> IntentRuleTestResult: + """测试意图规则并检测冲突""" + all_rules = await self.rule_service.get_rules(tenant_id) + results = [] + + for message in test_messages: + # 测试当前规则 + matched = self._match_rule(rule, message) + + # 检测冲突:查找其他也能匹配的规则 + conflict_rules = [] + for other_rule in all_rules: + if other_rule.id == rule.id: + continue + if self._match_rule(other_rule, message): + conflict_rules.append({ + "ruleId": other_rule.id, + "ruleName": other_rule.name, + "priority": other_rule.priority, + "reason": f"同时匹配(优先级:{other_rule.priority})" + }) + + results.append(IntentRuleTestCase( + message=message, + matched=matched, + conflictRules=conflict_rules + )) + + return IntentRuleTestResult( + ruleId=rule.id, + ruleName=rule.name, + results=results, + summary={ + "totalTests": len(test_messages), + "matchedCount": sum(1 for r in results if r.matched), + "matchRate": sum(1 for r in results if r.matched) / len(test_messages) + } + ) +``` + +#### 19.5.3 监控统计接口(对应 AC-AISVC-100) + +**端点**:`GET /admin/monitoring/intent-rules` + +**查询参数**: +- `startDate`: 开始日期(ISO 8601) +- `endDate`: 结束日期(ISO 8601) +- `limit`: 返回数量(默认 10) + +**响应体**: +```json +{ + "totalHits": 1250, + "totalConversations": 5000, + "hitRate": 0.25, + "rules": [ + { + "ruleId": "rule_001", + "ruleName": "退货意图", + "hitCount": 450, + "hitRate": 0.09, + "responseType": "flow", + "avgResponseTime": 1200, + "lastHitAt": "2026-02-27T14:30:00Z" + } + ], + "timeSeriesData": [ + { + "date": "2026-02-27", + "totalHits": 120, + "ruleBreakdown": { + "rule_001": 45, + "rule_002": 30, + "rule_003": 45 + } + } + ] +} +``` + +--- + +### 19.6 Prompt 模板测试与监控设计 + +#### 19.6.1 模板预览接口(对应 AC-AISVC-101) + +**端点**:`POST /admin/prompt-templates/{templateId}/preview` + +**请求体**: +```json +{ + "variables": { + "persona_name": "小助手", + "custom_var": "测试值" + }, + "sampleHistory": [ + {"role": "user", "content": "你好"}, + {"role": "assistant", "content": "您好,有什么可以帮您?"} + ], + "sampleMessage": "我想了解产品信息" +} +``` + +**响应体**: +```json +{ + "templateId": "tpl_001", + "templateName": "默认客服人设", + "version": 3, + "renderedSystemPrompt": "你是小助手,一个专业的客服助手...\n\n[行为约束]\n1. 不允许承诺具体赔偿金额\n...", + "finalMessages": [ + { + "role": "system", + "content": "你是小助手,一个专业的客服助手..." + }, + { + "role": "user", + "content": "你好" + }, + { + "role": "assistant", + "content": "您好,有什么可以帮您?" + }, + { + "role": "user", + "content": "我想了解产品信息" + } + ], + "tokenCount": { + "systemPrompt": 450, + "history": 120, + "currentMessage": 30, + "total": 600 + }, + "warnings": [] +} +``` + +#### 19.6.2 模板使用统计(对应 AC-AISVC-102) + +**端点**:`GET /admin/monitoring/prompt-templates` + +**响应体**: +```json +{ + "totalUsage": 5000, + "templates": [ + { + "templateId": "tpl_001", + "templateName": "默认客服人设", + "scene": "chat", + "usageCount": 3500, + "usageRate": 0.70, + "currentVersion": 3, + "avgTokenCount": 450, + "lastUsedAt": "2026-02-27T14:30:00Z" + } + ], + "sceneBreakdown": { + "chat": 3500, + "rag_qa": 1200, + "greeting": 300 + } +} +``` + +#### 19.6.3 实现策略 + +**Token 计数**: +```python +import tiktoken + +class PromptTemplateMonitor: + def __init__(self): + self.tokenizer = tiktoken.get_encoding("cl100k_base") + + async def preview_template( + self, + template: PromptTemplate, + variables: dict, + sample_history: list[dict], + sample_message: str + ) -> PromptPreviewResult: + """预览模板渲染结果并计算 token""" + # 1. 渲染系统指令 + version = await self.get_published_version(template.id) + system_prompt = self.variable_resolver.resolve( + version.system_instruction, + variables + ) + + # 2. 注入行为规则 + behavior_rules = await self.get_behavior_rules(template.tenant_id) + if behavior_rules: + system_prompt += "\n\n[行为约束]\n" + "\n".join( + f"{i+1}. {rule.rule_text}" + for i, rule in enumerate(behavior_rules) + ) + + # 3. 构建完整消息列表 + messages = [{"role": "system", "content": system_prompt}] + messages.extend(sample_history) + messages.append({"role": "user", "content": sample_message}) + + # 4. 计算 token + token_counts = { + "systemPrompt": len(self.tokenizer.encode(system_prompt)), + "history": sum( + len(self.tokenizer.encode(msg["content"])) + for msg in sample_history + ), + "currentMessage": len(self.tokenizer.encode(sample_message)), + } + token_counts["total"] = sum(token_counts.values()) + + # 5. 检查警告 + warnings = [] + if token_counts["total"] > 4000: + warnings.append("总 token 数超过 4000,可能影响性能") + if token_counts["systemPrompt"] > 2000: + warnings.append("系统指令过长,建议精简") + + return PromptPreviewResult( + templateId=template.id, + renderedSystemPrompt=system_prompt, + finalMessages=messages, + tokenCount=token_counts, + warnings=warnings + ) +``` + +--- + +### 19.7 话术流程测试与监控设计 + +#### 19.7.1 流程模拟测试接口(对应 AC-AISVC-103 ~ AC-AISVC-105) + +**端点**:`POST /admin/script-flows/{flowId}/simulate` + +**请求体**: +```json +{ + "userInputs": [ + "12345678901234", + "质量问题", + "是的" + ] +} +``` + +**响应体**: +```json +{ + "flowId": "flow_001", + "flowName": "退货流程", + "simulation": [ + { + "stepNo": 1, + "botMessage": "您好,请问您的订单号是多少?", + "userInput": "12345678901234", + "matchedCondition": { + "type": "pattern", + "pattern": "\\d{10,}", + "gotoStep": 2 + }, + "nextStep": 2, + "durationMs": 50 + }, + { + "stepNo": 2, + "botMessage": "请问退货原因是什么?", + "userInput": "质量问题", + "matchedCondition": { + "type": "default", + "gotoStep": 3 + }, + "nextStep": 3, + "durationMs": 45 + }, + { + "stepNo": 3, + "botMessage": "已为您登记退货申请,是否需要上门取件?", + "userInput": "是的", + "matchedCondition": { + "type": "keyword", + "keywords": ["是", "需要"], + "gotoStep": 4 + }, + "nextStep": 4, + "durationMs": 40 + } + ], + "result": { + "completed": true, + "totalSteps": 3, + "totalDurationMs": 135, + "finalMessage": "好的,我们会在 24 小时内安排快递上门取件。" + }, + "coverage": { + "totalSteps": 5, + "coveredSteps": 3, + "coverageRate": 0.60, + "uncoveredSteps": [4, 5] + }, + "issues": [ + "流程覆盖率低于 80%,建议增加测试用例" + ] +} +``` + +#### 19.7.2 流程覆盖率分析 + +```python +class ScriptFlowTester: + async def simulate_flow( + self, + flow: ScriptFlow, + user_inputs: list[str] + ) -> FlowSimulationResult: + """模拟流程执行并分析覆盖率""" + simulation = [] + current_step = 1 + visited_steps = set() + + for user_input in user_inputs: + if current_step > len(flow.steps): + break + + step_def = flow.steps[current_step - 1] + visited_steps.add(current_step) + + # 匹配下一步条件 + matched_condition, next_step = self._match_next_step( + step_def, + user_input + ) + + simulation.append(FlowSimulationStep( + stepNo=current_step, + botMessage=step_def["content"], + userInput=user_input, + matchedCondition=matched_condition, + nextStep=next_step + )) + + current_step = next_step + + # 分析覆盖率 + total_steps = len(flow.steps) + covered_steps = len(visited_steps) + coverage_rate = covered_steps / total_steps + + # 检测问题 + issues = [] + if coverage_rate < 0.8: + issues.append("流程覆盖率低于 80%,建议增加测试用例") + + # 检测死循环 + if len(simulation) > total_steps * 2: + issues.append("检测到可能的死循环") + + # 检测未覆盖的分支 + uncovered_steps = set(range(1, total_steps + 1)) - visited_steps + if uncovered_steps: + issues.append(f"未覆盖步骤:{sorted(uncovered_steps)}") + + return FlowSimulationResult( + flowId=flow.id, + simulation=simulation, + coverage={ + "totalSteps": total_steps, + "coveredSteps": covered_steps, + "coverageRate": coverage_rate, + "uncoveredSteps": list(uncovered_steps) + }, + issues=issues + ) +``` + +#### 19.7.3 流程监控统计(对应 AC-AISVC-106) + +**端点**:`GET /admin/monitoring/script-flows` + +**响应体**: +```json +{ + "totalActivations": 850, + "totalCompletions": 680, + "completionRate": 0.80, + "flows": [ + { + "flowId": "flow_001", + "flowName": "退货流程", + "activationCount": 450, + "completionCount": 380, + "completionRate": 0.84, + "avgDuration": 180, + "avgStepsCompleted": 4.2, + "dropOffPoints": [ + { + "stepNo": 2, + "dropOffCount": 50, + "dropOffRate": 0.11 + } + ], + "lastActivatedAt": "2026-02-27T14:30:00Z" + } + ] +} +``` + +--- + +### 19.8 输出护栏测试与监控设计 + +#### 19.8.1 禁词测试接口(对应 AC-AISVC-107) + +**端点**:`POST /admin/guardrails/forbidden-words/test` + +**请求体**: +```json +{ + "testTexts": [ + "我们的产品比竞品 A 更好", + "可以给您赔偿 1000 元", + "这是正常的回复" + ] +} +``` + +**响应体**: +```json +{ + "results": [ + { + "originalText": "我们的产品比竞品 A 更好", + "triggered": true, + "triggeredWords": [ + { + "word": "竞品 A", + "category": "competitor", + "strategy": "replace", + "replacement": "其他品牌" + } + ], + "filteredText": "我们的产品比其他品牌更好", + "blocked": false + }, + { + "originalText": "可以给您赔偿 1000 元", + "triggered": true, + "triggeredWords": [ + { + "word": "赔偿", + "category": "sensitive", + "strategy": "block", + "fallbackReply": "关于补偿问题,请联系人工客服处理" + } + ], + "filteredText": "关于补偿问题,请联系人工客服处理", + "blocked": true + }, + { + "originalText": "这是正常的回复", + "triggered": false, + "triggeredWords": [], + "filteredText": "这是正常的回复", + "blocked": false + } + ], + "summary": { + "totalTests": 3, + "triggeredCount": 2, + "blockedCount": 1, + "triggerRate": 0.67 + } +} +``` + +#### 19.8.2 护栏监控统计(对应 AC-AISVC-108) + +**端点**:`GET /admin/monitoring/guardrails` + +**响应体**: +```json +{ + "totalBlocks": 120, + "totalTriggers": 450, + "blockRate": 0.024, + "words": [ + { + "wordId": "word_001", + "word": "竞品 A", + "category": "competitor", + "strategy": "replace", + "hitCount": 85, + "blockCount": 0, + "lastHitAt": "2026-02-27T14:30:00Z" + }, + { + "wordId": "word_002", + "word": "赔偿", + "category": "sensitive", + "strategy": "block", + "hitCount": 45, + "blockCount": 45, + "lastHitAt": "2026-02-27T14:25:00Z" + } + ], + "categoryBreakdown": { + "competitor": 85, + "sensitive": 45, + "political": 0, + "custom": 20 + } +} +``` + +--- + +### 19.9 对话追踪与导出设计 + +#### 19.9.1 对话追踪接口(对应 AC-AISVC-109) + +**端点**:`GET /admin/monitoring/conversations` + +**查询参数**: +- `startDate`: 开始日期(ISO 8601) +- `endDate`: 结束日期(ISO 8601) +- `sessionId`: 会话 ID(可选) +- `channelType`: 渠道类型(可选) +- `hasError`: 是否包含错误(可选) +- `limit`: 返回数量(默认 20) +- `offset`: 偏移量(默认 0) + +**响应体**: +```json +{ + "total": 1250, + "conversations": [ + { + "sessionId": "kf_001_wx123", + "channelType": "wechat", + "messageCount": 12, + "startTime": "2026-02-27T14:00:00Z", + "lastMessageTime": "2026-02-27T14:15:00Z", + "duration": 900, + "intentRulesHit": [ + {"ruleId": "rule_001", "ruleName": "退货意图", "hitCount": 2} + ], + "flowsActivated": [ + {"flowId": "flow_001", "flowName": "退货流程", "status": "completed"} + ], + "guardrailTriggered": true, + "errorCount": 0, + "avgConfidence": 0.85, + "transferRequested": false + } + ] +} +``` + +#### 19.9.2 对话详情接口 + +**端点**:`GET /admin/monitoring/conversations/{sessionId}` + +**响应体**: +```json +{ + "sessionId": "kf_001_wx123", + "channelType": "wechat", + "startTime": "2026-02-27T14:00:00Z", + "messages": [ + { + "messageId": "msg_001", + "role": "user", + "content": "我想退货", + "timestamp": "2026-02-27T14:00:00Z" + }, + { + "messageId": "msg_002", + "role": "assistant", + "content": "您好,请问您的订单号是多少?", + "timestamp": "2026-02-27T14:00:02Z", + "confidence": 0.95, + "intentMatched": { + "ruleId": "rule_001", + "ruleName": "退货意图", + "responseType": "flow" + }, + "flowActivated": { + "flowId": "flow_001", + "flowName": "退货流程", + "currentStep": 1 + }, + "guardrailResult": { + "triggered": false, + "words": [] + }, + "latencyMs": 1200, + "totalTokens": 450, + "promptTokens": 380, + "completionTokens": 70 + } + ], + "summary": { + "totalMessages": 12, + "userMessages": 6, + "assistantMessages": 6, + "avgConfidence": 0.85, + "avgLatency": 1150, + "totalTokens": 5400, + "intentRulesHit": 2, + "flowsActivated": 1, + "guardrailTriggered": false, + "errorOccurred": false + } +} +``` + +#### 19.9.3 对话导出接口(对应 AC-AISVC-110) + +**端点**:`POST /admin/monitoring/conversations/export` + +**请求体**: +```json +{ + "startDate": "2026-02-20T00:00:00Z", + "endDate": "2026-02-27T23:59:59Z", + "format": "csv", + "filters": { + "channelType": "wechat", + "hasError": false, + "minConfidence": 0.7 + }, + "fields": [ + "sessionId", + "channelType", + "messageCount", + "avgConfidence", + "intentRulesHit", + "flowsActivated" + ] +} +``` + +**响应体**: +```json +{ + "exportId": "export_uuid", + "status": "processing", + "estimatedRows": 1250, + "downloadUrl": null, + "expiresAt": null +} +``` + +**导出状态查询**:`GET /admin/monitoring/conversations/export/{exportId}` + +**响应体**: +```json +{ + "exportId": "export_uuid", + "status": "completed", + "totalRows": 1250, + "downloadUrl": "/admin/monitoring/conversations/export/export_uuid/download", + "expiresAt": "2026-02-28T14:30:00Z", + "createdAt": "2026-02-27T14:25:00Z" +} +``` + +#### 19.9.4 实现策略 + +**异步导出处理**: +```python +import asyncio +import csv +from io import StringIO + +class ConversationExporter: + async def export_conversations( + self, + tenant_id: str, + filters: dict, + fields: list[str], + format: str = "csv" + ) -> str: + """异步导出对话数据""" + export_id = str(uuid.uuid4()) + + # 创建导出任务记录 + export_task = ExportTask( + id=export_id, + tenant_id=tenant_id, + status="processing", + created_at=datetime.utcnow() + ) + await self.save_export_task(export_task) + + # 异步执行导出 + asyncio.create_task(self._process_export( + export_id, + tenant_id, + filters, + fields, + format + )) + + return export_id + + async def _process_export( + self, + export_id: str, + tenant_id: str, + filters: dict, + fields: list[str], + format: str + ): + """后台处理导出任务""" + try: + # 1. 查询对话数据(分批处理,避免内存溢出) + batch_size = 1000 + offset = 0 + output = StringIO() + writer = csv.DictWriter(output, fieldnames=fields) + writer.writeheader() + + while True: + conversations = await self.query_conversations( + tenant_id, + filters, + limit=batch_size, + offset=offset + ) + + if not conversations: + break + + for conv in conversations: + row = {field: conv.get(field) for field in fields} + writer.writerow(row) + + offset += batch_size + + # 2. 保存到临时文件 + file_path = f"/tmp/exports/{export_id}.csv" + with open(file_path, "w", encoding="utf-8") as f: + f.write(output.getvalue()) + + # 3. 更新导出任务状态 + await self.update_export_task( + export_id, + status="completed", + file_path=file_path, + total_rows=offset, + expires_at=datetime.utcnow() + timedelta(hours=24) + ) + + except Exception as e: + logger.error(f"Export failed: {e}") + await self.update_export_task( + export_id, + status="failed", + error=str(e) + ) +``` + +--- + +### 19.10 新增数据库实体汇总 + +v0.7.0 新增以下 SQLModel 实体(均包含 `tenant_id` 字段,遵循现有多租户隔离模式): + +| 实体 | 表名 | 用途 | 关键字段 | +|------|------|------|----------| +| FlowTestRecord | flow_test_records | 完整流程测试记录 | session_id, steps (JSONB), final_response (JSONB) | +| ExportTask | export_tasks | 对话导出任务 | status, file_path, total_rows, expires_at | + +**扩展现有实体**: + +| 实体 | 表名 | 新增字段 | 用途 | +|------|------|----------|------| +| ChatMessage | chat_messages | prompt_template_id (UUID, nullable) | 关联使用的 Prompt 模板 | +| ChatMessage | chat_messages | intent_rule_id (UUID, nullable) | 关联命中的意图规则 | +| ChatMessage | chat_messages | flow_instance_id (UUID, nullable) | 关联的话术流程实例 | +| ChatMessage | chat_messages | guardrail_triggered (BOOLEAN) | 是否触发护栏 | +| ChatMessage | chat_messages | guardrail_words (JSONB, nullable) | 触发的禁词列表 | + +**索引优化**: +```sql +-- 监控查询优化 +CREATE INDEX idx_chat_messages_monitoring +ON chat_messages(tenant_id, created_at DESC, role); + +-- 意图规则统计优化 +CREATE INDEX idx_chat_messages_intent +ON chat_messages(tenant_id, intent_rule_id) +WHERE intent_rule_id IS NOT NULL; + +-- 流程统计优化 +CREATE INDEX idx_flow_instances_monitoring +ON flow_instances(tenant_id, status, started_at DESC); + +-- 护栏统计优化 +CREATE INDEX idx_chat_messages_guardrail +ON chat_messages(tenant_id, guardrail_triggered, created_at DESC) +WHERE guardrail_triggered = true; +``` + +--- + +### 19.11 性能优化策略 + +#### 19.11.1 缓存策略 + +**Redis 缓存层次**: +``` +Level 1: Dashboard 统计(TTL 60s) + - Key: stats:{tenant_id}:dashboard + - 内容:聚合统计数据 + +Level 2: Top N 排行榜(TTL 300s) + - Key: stats:{tenant_id}:top:intent_rules + - Key: stats:{tenant_id}:top:prompt_templates + - Key: stats:{tenant_id}:top:script_flows + - Key: stats:{tenant_id}:top:forbidden_words + +Level 3: 实时计数器(TTL 90天) + - Key: stats:{tenant_id}:counter:{metric}:{date} + - 内容:增量计数器 +``` + +**缓存更新策略**: +- Dashboard 统计:每次对话结束后异步更新计数器 +- Top N 排行榜:后台任务每 5 分钟重新计算 +- 实时计数器:使用 Redis INCR 原子操作 + +#### 19.11.2 数据库优化 + +**分区策略**: +```sql +-- flow_test_records 按日期分区(保留 7 天) +CREATE TABLE flow_test_records ( + id UUID PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + created_at TIMESTAMP NOT NULL, + ... +) PARTITION BY RANGE (created_at); + +CREATE TABLE flow_test_records_2026_02_27 +PARTITION OF flow_test_records +FOR VALUES FROM ('2026-02-27') TO ('2026-02-28'); + +-- 自动清理过期分区(定时任务) +DROP TABLE IF EXISTS flow_test_records_2026_02_20; +``` + +**查询优化**: +- 使用覆盖索引减少回表查询 +- 对大表使用 LIMIT + 游标分页 +- 避免 SELECT *,只查询需要的字段 +- 使用 EXPLAIN ANALYZE 分析慢查询 + +#### 19.11.3 监控埋点优化 + +**最小化性能影响**: +```python +class MonitoringMiddleware: + async def __call__(self, request, call_next): + # 仅在测试模式或采样时记录详细日志 + enable_detailed_log = ( + request.url.path.startswith("/admin/test/") or + self._should_sample() # 1% 采样率 + ) + + if enable_detailed_log: + # 记录详细步骤日志 + request.state.monitoring_enabled = True + + response = await call_next(request) + + # 异步更新统计(不阻塞响应) + if hasattr(request.state, "monitoring_data"): + asyncio.create_task( + self._update_stats_async(request.state.monitoring_data) + ) + + return response + + def _should_sample(self) -> bool: + """1% 采样率""" + return random.random() < 0.01 +``` + +--- + +### 19.12 v0.7.0 风险与待澄清 + +#### 19.12.1 性能风险 + +- **完整流程测试**:记录 12 步详细日志会增加 10-15% 的延迟,仅用于测试环境。 +- **对话导出**:大批量导出(>10000 条)可能导致内存压力,需要流式处理。 +- **实时统计**:高并发场景下 Redis 计数器可能成为瓶颈,考虑使用 Redis Cluster。 + +#### 19.12.2 存储风险 + +- **测试日志膨胀**:完整流程测试日志每条约 5KB,需严格执行 7 天清理策略。 +- **导出文件管理**:导出文件需要定期清理(24 小时过期),避免磁盘占用。 +- **索引膨胀**:新增多个索引会增加写入开销,需监控索引使用率。 + +#### 19.12.3 功能待澄清 + +- **对话导出格式**:是否需要支持 JSON/Excel 格式?当前仅实现 CSV。 +- **实时监控推送**:是否需要 WebSocket 实时推送监控数据?当前仅支持轮询。 +- **历史数据迁移**:现有对话数据是否需要回填 `prompt_template_id` 等新字段? +- **权限控制**:测试与监控接口是否需要更细粒度的权限控制(如只读/读写)? + +#### 19.12.4 兼容性风险 + +- **数据库迁移**:新增字段和索引需要在生产环境谨慎执行,建议分批迁移。 +- **API 版本**:新增监控接口不影响现有 `/ai/chat` 接口,向后兼容。 +- **前端适配**:Dashboard 新增统计字段需要前端同步更新,否则显示为空。 + +--- + +## 20. 总结 + +v0.7.0 测试与监控系统为 AI 中台提供了完整的可观测性与可测试性能力: + +**核心价值**: +- **独立测试**:为四大功能(Prompt 模板、意图规则、话术流程、输出护栏)提供独立测试能力 +- **完整追踪**:12 步流程的详细执行日志,支持问题排查与效果评估 +- **实时监控**:细粒度的运行时统计,支持规则命中率、流程完成率、护栏拦截率等 +- **数据导出**:支持对话数据导出,便于离线分析与模型优化 + +**技术亮点**: +- **性能优先**:生产模式性能影响 <2%,测试模式仅在显式开启时生效 +- **存储可控**:测试日志 7 天自动清理,导出文件 24 小时过期 +- **租户隔离**:所有监控数据按 `tenant_id` 隔离,保证多租户安全 +- **向后兼容**:新增监控不影响现有接口行为与性能 + +**实施建议**: +1. 优先实现 Dashboard 统计增强(AC-AISVC-91, AC-AISVC-92) +2. 其次实现完整流程测试台(AC-AISVC-93 ~ AC-AISVC-96) +3. 再实现各功能的独立测试接口(AC-AISVC-97 ~ AC-AISVC-108) +4. 最后实现对话追踪与导出(AC-AISVC-109, AC-AISVC-110) diff --git a/spec/ai-service/openapi.admin.yaml b/spec/ai-service/openapi.admin.yaml index 3fbf5d0..b44df87 100644 --- a/spec/ai-service/openapi.admin.yaml +++ b/spec/ai-service/openapi.admin.yaml @@ -2,8 +2,8 @@ openapi: 3.1.0 info: title: "AI Service Admin API" description: "AI 中台管理类接口契约(Provider: ai-service),支持 ai-service-admin 模块进行知识库、Prompt 及 RAG 调试管理。" - version: "0.2.0" - x-contract-level: L1 # 已实现级别,接口已真实对接 + version: "0.7.0" + x-contract-level: L1 components: parameters: XTenantId: @@ -79,6 +79,743 @@ components: totalPages: type: integer description: "总页数" + # v0.6.0 新增 schemas + KnowledgeBaseInfo: + type: object + properties: + id: + type: string + description: "知识库ID" + name: + type: string + description: "知识库名称" + kbType: + type: string + description: "知识库类型" + enum: [product, faq, script, policy, general] + description: + type: string + description: "描述" + priority: + type: integer + description: "优先级权重" + isEnabled: + type: boolean + description: "是否启用" + docCount: + type: integer + description: "文档数量" + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + KnowledgeBaseCreate: + type: object + required: [name, kbType] + properties: + name: + type: string + kbType: + type: string + enum: [product, faq, script, policy, general] + description: + type: string + priority: + type: integer + default: 0 + KnowledgeBaseUpdate: + type: object + properties: + name: + type: string + kbType: + type: string + enum: [product, faq, script, policy, general] + description: + type: string + priority: + type: integer + isEnabled: + type: boolean + PromptTemplateInfo: + type: object + properties: + id: + type: string + name: + type: string + description: "模板名称" + scene: + type: string + description: "场景标签" + description: + type: string + isDefault: + type: boolean + publishedVersion: + type: integer + description: "当前发布版本号" + nullable: true + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + PromptTemplateCreate: + type: object + required: [name, scene, systemInstruction] + properties: + name: + type: string + scene: + type: string + description: + type: string + systemInstruction: + type: string + description: "系统指令内容,支持 {{variable}} 占位符" + variables: + type: array + items: + type: object + properties: + name: + type: string + default: + type: string + description: + type: string + PromptTemplateDetail: + type: object + properties: + id: + type: string + name: + type: string + scene: + type: string + description: + type: string + currentVersion: + type: object + properties: + version: + type: integer + status: + type: string + enum: [draft, published, archived] + systemInstruction: + type: string + variables: + type: array + items: + type: object + versions: + type: array + items: + type: object + properties: + version: + type: integer + status: + type: string + enum: [draft, published, archived] + createdAt: + type: string + format: date-time + IntentRuleInfo: + type: object + properties: + id: + type: string + name: + type: string + description: "意图名称" + keywords: + type: array + items: + type: string + patterns: + type: array + items: + type: string + priority: + type: integer + responseType: + type: string + enum: [flow, rag, fixed, transfer] + targetKbIds: + type: array + items: + type: string + nullable: true + flowId: + type: string + nullable: true + fixedReply: + type: string + nullable: true + transferMessage: + type: string + nullable: true + isEnabled: + type: boolean + hitCount: + type: integer + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + IntentRuleCreate: + type: object + required: [name, responseType] + properties: + name: + type: string + keywords: + type: array + items: + type: string + patterns: + type: array + items: + type: string + priority: + type: integer + default: 0 + responseType: + type: string + enum: [flow, rag, fixed, transfer] + targetKbIds: + type: array + items: + type: string + flowId: + type: string + fixedReply: + type: string + transferMessage: + type: string + ScriptFlowInfo: + type: object + properties: + id: + type: string + name: + type: string + description: + type: string + stepCount: + type: integer + isEnabled: + type: boolean + linkedRuleCount: + type: integer + description: "关联意图规则数" + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + ScriptFlowCreate: + type: object + required: [name, steps] + properties: + name: + type: string + description: + type: string + steps: + type: array + items: + $ref: "#/components/schemas/FlowStep" + FlowStep: + type: object + required: [stepNo, content] + properties: + stepNo: + type: integer + content: + type: string + description: "话术内容" + waitInput: + type: boolean + default: true + timeoutSeconds: + type: integer + default: 120 + timeoutAction: + type: string + enum: [repeat, skip, transfer] + default: repeat + nextConditions: + type: array + items: + type: object + properties: + keywords: + type: array + items: + type: string + pattern: + type: string + gotoStep: + type: integer + defaultNext: + type: integer + nullable: true + ForbiddenWordInfo: + type: object + properties: + id: + type: string + word: + type: string + category: + type: string + enum: [competitor, sensitive, political, custom] + strategy: + type: string + enum: [mask, replace, block] + replacement: + type: string + nullable: true + fallbackReply: + type: string + nullable: true + isEnabled: + type: boolean + hitCount: + type: integer + createdAt: + type: string + format: date-time + ForbiddenWordCreate: + type: object + required: [word, category, strategy] + properties: + word: + type: string + category: + type: string + enum: [competitor, sensitive, political, custom] + strategy: + type: string + enum: [mask, replace, block] + replacement: + type: string + fallbackReply: + type: string + BehaviorRuleInfo: + type: object + properties: + id: + type: string + ruleText: + type: string + description: "行为约束描述" + category: + type: string + enum: [compliance, tone, boundary, custom] + isEnabled: + type: boolean + createdAt: + type: string + format: date-time + BehaviorRuleCreate: + type: object + required: [ruleText, category] + properties: + ruleText: + type: string + category: + type: string + enum: [compliance, tone, boundary, custom] + + # v0.7.0 新增 schemas - 测试与监控 + FlowExecutionStep: + type: object + description: "流程执行步骤详情" + properties: + step: + type: integer + description: "步骤编号(1-12)" + name: + type: string + description: "步骤名称" + status: + type: string + enum: [success, failed, skipped] + description: "执行状态" + durationMs: + type: integer + description: "执行耗时(毫秒)" + input: + type: object + description: "步骤输入数据" + output: + type: object + description: "步骤输出数据" + error: + type: string + nullable: true + description: "错误信息(如果失败)" + metadata: + type: object + description: "步骤元数据" + + FlowExecutionResult: + type: object + description: "完整流程执行结果" + properties: + executionId: + type: string + steps: + type: array + items: + $ref: "#/components/schemas/FlowExecutionStep" + totalDurationMs: + type: integer + finalReply: + type: string + confidence: + type: number + format: float + shouldTransfer: + type: boolean + + IntentRuleTestResult: + type: object + description: "意图规则测试结果" + properties: + matched: + type: boolean + description: "是否命中该规则" + matchedKeywords: + type: array + items: + type: string + description: "匹配的关键词列表" + matchedPatterns: + type: array + items: + type: string + description: "匹配的正则表达式列表" + priority: + type: integer + description: "规则优先级" + priorityRank: + type: integer + description: "在所有规则中的优先级排名" + conflictRules: + type: array + items: + type: object + properties: + ruleId: + type: string + ruleName: + type: string + priority: + type: integer + description: "同时命中的其他规则列表" + reason: + type: string + nullable: true + description: "未命中原因" + + IntentRuleStats: + type: object + description: "意图规则监控统计" + properties: + ruleId: + type: string + ruleName: + type: string + hitCount: + type: integer + description: "命中次数" + hitRate: + type: number + format: float + description: "命中率" + avgResponseTime: + type: number + format: float + description: "平均响应时间(毫秒)" + lastHitTime: + type: string + format: date-time + nullable: true + responseType: + type: string + enum: [flow, rag, fixed, transfer] + + IntentRuleHitRecord: + type: object + description: "意图规则命中记录" + properties: + conversationId: + type: string + sessionId: + type: string + userMessage: + type: string + matchedKeywords: + type: array + items: + type: string + matchedPatterns: + type: array + items: + type: string + responseType: + type: string + executionResult: + type: string + enum: [success, failed] + hitTime: + type: string + format: date-time + + PromptTemplatePreview: + type: object + description: "Prompt 模板预览结果" + properties: + templateId: + type: string + templateName: + type: string + version: + type: integer + rawContent: + type: string + description: "原始模板内容" + variables: + type: array + items: + type: object + properties: + name: + type: string + value: + type: string + description: "变量列表及当前值" + renderedContent: + type: string + description: "渲染后的完整 Prompt" + estimatedTokens: + type: integer + description: "预估 Token 数量" + + PromptTemplateStats: + type: object + description: "Prompt 模板监控统计" + properties: + templateId: + type: string + templateName: + type: string + scene: + type: string + usageCount: + type: integer + avgTokens: + type: number + format: float + avgPromptTokens: + type: number + format: float + avgCompletionTokens: + type: number + format: float + lastUsedTime: + type: string + format: date-time + nullable: true + + ScriptFlowSimulation: + type: object + description: "话术流程模拟执行结果" + properties: + simulationId: + type: string + flowId: + type: string + flowName: + type: string + currentStep: + type: integer + stepContent: + type: string + waitForInput: + type: boolean + nextConditions: + type: array + items: + type: object + properties: + keywords: + type: array + items: + type: string + nextStep: + type: integer + + ScriptFlowStats: + type: object + description: "话术流程监控统计" + properties: + flowId: + type: string + flowName: + type: string + activationCount: + type: integer + completionCount: + type: integer + completionRate: + type: number + format: float + avgCompletionTime: + type: number + format: float + description: "平均完成时长(秒)" + interruptionCount: + type: integer + interruptionRate: + type: number + format: float + + ScriptFlowExecution: + type: object + description: "话术流程执行记录" + properties: + executionId: + type: string + sessionId: + type: string + activationTime: + type: string + format: date-time + currentStep: + type: integer + status: + type: string + enum: [in_progress, completed, interrupted] + interruptionReason: + type: string + nullable: true + completionTime: + type: string + format: date-time + nullable: true + + GuardrailTestResult: + type: object + description: "输出护栏测试结果" + properties: + originalText: + type: string + processedText: + type: string + detectedWords: + type: array + items: + type: object + properties: + word: + type: string + category: + type: string + position: + type: integer + strategy: + type: string + enum: [mask, replace, block] + isBlocked: + type: boolean + blockReason: + type: string + nullable: true + + GuardrailStats: + type: object + description: "输出护栏监控统计" + properties: + wordId: + type: string + word: + type: string + category: + type: string + blockCount: + type: integer + replaceCount: + type: integer + maskCount: + type: integer + lastBlockTime: + type: string + format: date-time + nullable: true + + GuardrailBlockRecord: + type: object + description: "禁词拦截记录" + properties: + blockId: + type: string + sessionId: + type: string + originalText: + type: string + processedText: + type: string + strategy: + type: string + enum: [mask, replace, block] + blockTime: + type: string + format: date-time + + ConversationRecord: + type: object + description: "对话追踪记录" + properties: + conversationId: + type: string + sessionId: + type: string + userMessage: + type: string + aiReply: + type: string + triggeredRules: + type: array + items: + type: string + description: "触发的意图规则 ID 列表" + usedTemplate: + type: string + nullable: true + description: "使用的 Prompt 模板 ID" + usedFlow: + type: string + nullable: true + description: "使用的话术流程 ID" + executionTime: + type: integer + description: "执行耗时(毫秒)" + createdAt: + type: string + format: date-time paths: /admin/kb/documents: @@ -383,3 +1120,1442 @@ paths: $ref: "#/components/responses/Unauthorized" '403': $ref: "#/components/responses/Forbidden" + + # ========== v0.6.0 新增 paths ========== + + /admin/kb/knowledge-bases: + get: + summary: "查询知识库列表" + operationId: "listKnowledgeBases" + tags: + - KB Management + x-requirements: ["AC-ASA-29", "AC-AISVC-59"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: kbType + in: query + required: false + schema: + type: string + enum: [product, faq, script, policy, general] + description: "知识库类型筛选" + - name: isEnabled + in: query + required: false + schema: + type: boolean + description: "启用状态筛选" + responses: + '200': + description: "知识库列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/KnowledgeBaseInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "创建知识库" + operationId: "createKnowledgeBase" + tags: + - KB Management + x-requirements: ["AC-ASA-30", "AC-AISVC-59"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/KnowledgeBaseCreate" + responses: + '201': + description: "创建成功" + content: + application/json: + schema: + $ref: "#/components/schemas/KnowledgeBaseInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/kb/knowledge-bases/{kbId}: + put: + summary: "更新知识库" + operationId: "updateKnowledgeBase" + tags: + - KB Management + x-requirements: ["AC-ASA-32", "AC-AISVC-61"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: kbId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/KnowledgeBaseUpdate" + responses: + '200': + description: "更新成功" + content: + application/json: + schema: + $ref: "#/components/schemas/KnowledgeBaseInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + delete: + summary: "删除知识库" + operationId: "deleteKnowledgeBase" + tags: + - KB Management + x-requirements: ["AC-ASA-33", "AC-AISVC-62"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: kbId + in: path + required: true + schema: + type: string + responses: + '204': + description: "删除成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/prompt-templates: + get: + summary: "查询 Prompt 模板列表" + operationId: "listPromptTemplates" + tags: + - Prompt Management + x-requirements: ["AC-ASA-23", "AC-AISVC-57"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: scene + in: query + required: false + schema: + type: string + description: "场景标签筛选" + responses: + '200': + description: "模板列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/PromptTemplateInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "创建 Prompt 模板" + operationId: "createPromptTemplate" + tags: + - Prompt Management + x-requirements: ["AC-ASA-24", "AC-AISVC-52"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/PromptTemplateCreate" + responses: + '201': + description: "创建成功" + content: + application/json: + schema: + $ref: "#/components/schemas/PromptTemplateInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/prompt-templates/{tplId}: + get: + summary: "查询 Prompt 模板详情" + operationId: "getPromptTemplateDetail" + tags: + - Prompt Management + x-requirements: ["AC-ASA-28", "AC-AISVC-58"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: tplId + in: path + required: true + schema: + type: string + responses: + '200': + description: "模板详情(含版本历史)" + content: + application/json: + schema: + $ref: "#/components/schemas/PromptTemplateDetail" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + put: + summary: "更新 Prompt 模板(自动创建新版本)" + operationId: "updatePromptTemplate" + tags: + - Prompt Management + x-requirements: ["AC-ASA-25", "AC-AISVC-53"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: tplId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/PromptTemplateCreate" + responses: + '200': + description: "更新成功,返回新版本信息" + content: + application/json: + schema: + $ref: "#/components/schemas/PromptTemplateInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/prompt-templates/{tplId}/publish: + post: + summary: "发布 Prompt 模板指定版本" + operationId: "publishPromptTemplateVersion" + tags: + - Prompt Management + x-requirements: ["AC-ASA-26", "AC-AISVC-54"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: tplId + in: path + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + type: object + required: [version] + properties: + version: + type: integer + description: "要发布的版本号" + responses: + '200': + description: "发布成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/prompt-templates/{tplId}/rollback: + post: + summary: "回滚 Prompt 模板到指定版本" + operationId: "rollbackPromptTemplate" + tags: + - Prompt Management + x-requirements: ["AC-ASA-27", "AC-AISVC-55"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: tplId + in: path + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + type: object + required: [version] + properties: + version: + type: integer + description: "要回滚到的版本号" + responses: + '200': + description: "回滚成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/intent-rules: + get: + summary: "查询意图规则列表" + operationId: "listIntentRules" + tags: + - Intent Rules + x-requirements: ["AC-ASA-34", "AC-AISVC-65"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: responseType + in: query + required: false + schema: + type: string + enum: [flow, rag, fixed, transfer] + description: "响应类型筛选" + - name: isEnabled + in: query + required: false + schema: + type: boolean + description: "启用状态筛选" + responses: + '200': + description: "规则列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/IntentRuleInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "创建意图规则" + operationId: "createIntentRule" + tags: + - Intent Rules + x-requirements: ["AC-ASA-35", "AC-AISVC-66"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/IntentRuleCreate" + responses: + '201': + description: "创建成功" + content: + application/json: + schema: + $ref: "#/components/schemas/IntentRuleInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/intent-rules/{ruleId}: + put: + summary: "更新意图规则" + operationId: "updateIntentRule" + tags: + - Intent Rules + x-requirements: ["AC-ASA-36", "AC-AISVC-67"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: ruleId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/IntentRuleCreate" + responses: + '200': + description: "更新成功" + content: + application/json: + schema: + $ref: "#/components/schemas/IntentRuleInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + delete: + summary: "删除意图规则" + operationId: "deleteIntentRule" + tags: + - Intent Rules + x-requirements: ["AC-ASA-36", "AC-AISVC-68"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: ruleId + in: path + required: true + schema: + type: string + responses: + '204': + description: "删除成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/script-flows: + get: + summary: "查询话术流程列表" + operationId: "listScriptFlows" + tags: + - Script Flows + x-requirements: ["AC-ASA-37", "AC-AISVC-71"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: isEnabled + in: query + required: false + schema: + type: boolean + description: "启用状态筛选" + responses: + '200': + description: "流程列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ScriptFlowInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "创建话术流程" + operationId: "createScriptFlow" + tags: + - Script Flows + x-requirements: ["AC-ASA-38", "AC-AISVC-72"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ScriptFlowCreate" + responses: + '201': + description: "创建成功" + content: + application/json: + schema: + $ref: "#/components/schemas/ScriptFlowInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/script-flows/{flowId}: + get: + summary: "查询话术流程详情" + operationId: "getScriptFlowDetail" + tags: + - Script Flows + x-requirements: ["AC-ASA-39", "AC-AISVC-73"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: flowId + in: path + required: true + schema: + type: string + responses: + '200': + description: "流程详情(含完整步骤定义)" + content: + application/json: + schema: + type: object + properties: + id: + type: string + name: + type: string + description: + type: string + isEnabled: + type: boolean + steps: + type: array + items: + $ref: "#/components/schemas/FlowStep" + linkedRuleCount: + type: integer + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + put: + summary: "更新话术流程" + operationId: "updateScriptFlow" + tags: + - Script Flows + x-requirements: ["AC-ASA-38", "AC-AISVC-72"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: flowId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ScriptFlowCreate" + responses: + '200': + description: "更新成功" + content: + application/json: + schema: + $ref: "#/components/schemas/ScriptFlowInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/guardrails/forbidden-words: + get: + summary: "查询禁词列表" + operationId: "listForbiddenWords" + tags: + - Guardrails + x-requirements: ["AC-ASA-41", "AC-AISVC-78"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: category + in: query + required: false + schema: + type: string + enum: [competitor, sensitive, political, custom] + description: "类别筛选" + - name: isEnabled + in: query + required: false + schema: + type: boolean + description: "启用状态筛选" + responses: + '200': + description: "禁词列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ForbiddenWordInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "添加禁词" + operationId: "createForbiddenWord" + tags: + - Guardrails + x-requirements: ["AC-ASA-42", "AC-AISVC-79"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenWordCreate" + responses: + '201': + description: "添加成功" + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenWordInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/guardrails/forbidden-words/{wordId}: + put: + summary: "更新禁词" + operationId: "updateForbiddenWord" + tags: + - Guardrails + x-requirements: ["AC-ASA-42", "AC-AISVC-80"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: wordId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenWordCreate" + responses: + '200': + description: "更新成功" + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenWordInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + delete: + summary: "删除禁词" + operationId: "deleteForbiddenWord" + tags: + - Guardrails + x-requirements: ["AC-ASA-42", "AC-AISVC-81"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: wordId + in: path + required: true + schema: + type: string + responses: + '204': + description: "删除成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/guardrails/behavior-rules: + get: + summary: "查询行为规则列表" + operationId: "listBehaviorRules" + tags: + - Guardrails + x-requirements: ["AC-ASA-43", "AC-AISVC-84"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: category + in: query + required: false + schema: + type: string + enum: [compliance, tone, boundary, custom] + description: "类别筛选" + responses: + '200': + description: "行为规则列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/BehaviorRuleInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "添加行为规则" + operationId: "createBehaviorRule" + tags: + - Guardrails + x-requirements: ["AC-ASA-44", "AC-AISVC-85"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BehaviorRuleCreate" + responses: + '201': + description: "添加成功" + content: + application/json: + schema: + $ref: "#/components/schemas/BehaviorRuleInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/guardrails/behavior-rules/{ruleId}: + put: + summary: "更新行为规则" + operationId: "updateBehaviorRule" + tags: + - Guardrails + x-requirements: ["AC-ASA-43", "AC-AISVC-85"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: ruleId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BehaviorRuleCreate" + responses: + '200': + description: "更新成功" + content: + application/json: + schema: + $ref: "#/components/schemas/BehaviorRuleInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + delete: + summary: "删除行为规则" + operationId: "deleteBehaviorRule" + tags: + - Guardrails + x-requirements: ["AC-ASA-43", "AC-AISVC-85"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: ruleId + in: path + required: true + schema: + type: string + responses: + '204': + description: "删除成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + # v0.7.0 新增 paths - 测试与监控 + /admin/test/flow-execution: + post: + summary: "完整流程测试" + operationId: "testFlowExecution" + tags: + - Testing + x-requirements: ["AC-ASA-49", "AC-AISVC-93", "AC-AISVC-94", "AC-AISVC-95"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [message] + properties: + message: + type: string + description: "测试消息" + sessionId: + type: string + description: "会话 ID(可选)" + compareConfigs: + type: array + description: "对比配置列表" + items: + type: object + properties: + templateId: + type: string + kbIds: + type: array + items: + type: string + retrievalParams: + type: object + responses: + '200': + description: "流程执行结果" + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/FlowExecutionResult" + - type: object + properties: + comparisons: + type: array + items: + $ref: "#/components/schemas/FlowExecutionResult" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/intent-rules/{ruleId}/test: + post: + summary: "测试意图规则" + operationId: "testIntentRule" + tags: + - Testing + x-requirements: ["AC-ASA-53", "AC-AISVC-96"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: ruleId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [message] + properties: + message: + type: string + description: "测试消息" + responses: + '200': + description: "测试结果" + content: + application/json: + schema: + $ref: "#/components/schemas/IntentRuleTestResult" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + /admin/monitoring/intent-rules: + get: + summary: "查询意图规则监控统计" + operationId: "getIntentRuleStats" + tags: + - Monitoring + x-requirements: ["AC-ASA-54", "AC-AISVC-97"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: startTime + in: query + schema: + type: string + format: date-time + - name: endTime + in: query + schema: + type: string + format: date-time + - name: responseType + in: query + schema: + type: string + enum: [flow, rag, fixed, transfer] + responses: + '200': + description: "规则统计列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/IntentRuleStats" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/intent-rules/{ruleId}/hits: + get: + summary: "查询意图规则命中记录" + operationId: "getIntentRuleHits" + tags: + - Monitoring + x-requirements: ["AC-ASA-55", "AC-AISVC-98"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: ruleId + in: path + required: true + schema: + type: string + - name: page + in: query + schema: + type: integer + default: 1 + - name: pageSize + in: query + schema: + type: integer + default: 20 + responses: + '200': + description: "命中记录列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/IntentRuleHitRecord" + pagination: + $ref: "#/components/schemas/PageInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/prompt-templates/{tplId}/preview: + post: + summary: "预览 Prompt 模板" + operationId: "previewPromptTemplate" + tags: + - Testing + x-requirements: ["AC-ASA-56", "AC-AISVC-99"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: tplId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + variables: + type: object + description: "变量测试值(key-value 对)" + additionalProperties: + type: string + responses: + '200': + description: "预览结果" + content: + application/json: + schema: + $ref: "#/components/schemas/PromptTemplatePreview" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/prompt-templates: + get: + summary: "查询 Prompt 模板监控统计" + operationId: "getPromptTemplateStats" + tags: + - Monitoring + x-requirements: ["AC-ASA-58", "AC-AISVC-100"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: scene + in: query + schema: + type: string + description: "场景标签筛选" + - name: startTime + in: query + schema: + type: string + format: date-time + - name: endTime + in: query + schema: + type: string + format: date-time + responses: + '200': + description: "模板统计列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/PromptTemplateStats" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/script-flows/{flowId}/simulate: + post: + summary: "模拟执行话术流程" + operationId: "simulateScriptFlow" + tags: + - Testing + x-requirements: ["AC-ASA-59", "AC-AISVC-101"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: flowId + in: path + required: true + schema: + type: string + responses: + '200': + description: "模拟执行结果" + content: + application/json: + schema: + $ref: "#/components/schemas/ScriptFlowSimulation" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/script-flows/{flowId}/simulate/{simulationId}/next: + post: + summary: "推进话术流程模拟" + operationId: "simulateScriptFlowNext" + tags: + - Testing + x-requirements: ["AC-ASA-59", "AC-AISVC-102"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: flowId + in: path + required: true + schema: + type: string + - name: simulationId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [userInput] + properties: + userInput: + type: string + description: "用户模拟输入" + responses: + '200': + description: "下一步结果" + content: + application/json: + schema: + $ref: "#/components/schemas/ScriptFlowSimulation" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/script-flows: + get: + summary: "查询话术流程监控统计" + operationId: "getScriptFlowStats" + tags: + - Monitoring + x-requirements: ["AC-ASA-60", "AC-AISVC-103"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: startTime + in: query + schema: + type: string + format: date-time + - name: endTime + in: query + schema: + type: string + format: date-time + responses: + '200': + description: "流程统计列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ScriptFlowStats" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/script-flows/{flowId}/executions: + get: + summary: "查询话术流程执行记录" + operationId: "getScriptFlowExecutions" + tags: + - Monitoring + x-requirements: ["AC-ASA-61", "AC-AISVC-104"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: flowId + in: path + required: true + schema: + type: string + - name: status + in: query + schema: + type: string + enum: [in_progress, completed, interrupted] + - name: page + in: query + schema: + type: integer + default: 1 + - name: pageSize + in: query + schema: + type: integer + default: 20 + responses: + '200': + description: "执行记录列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ScriptFlowExecution" + pagination: + $ref: "#/components/schemas/PageInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/guardrails/test: + post: + summary: "测试输出护栏" + operationId: "testGuardrail" + tags: + - Testing + x-requirements: ["AC-ASA-62", "AC-AISVC-105"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [text] + properties: + text: + type: string + description: "测试文本" + responses: + '200': + description: "测试结果" + content: + application/json: + schema: + $ref: "#/components/schemas/GuardrailTestResult" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/guardrails: + get: + summary: "查询输出护栏监控统计" + operationId: "getGuardrailStats" + tags: + - Monitoring + x-requirements: ["AC-ASA-63", "AC-AISVC-106"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: category + in: query + schema: + type: string + enum: [competitor, sensitive, political, custom] + - name: startTime + in: query + schema: + type: string + format: date-time + - name: endTime + in: query + schema: + type: string + format: date-time + responses: + '200': + description: "护栏统计列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/GuardrailStats" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/guardrails/{wordId}/blocks: + get: + summary: "查询禁词拦截记录" + operationId: "getGuardrailBlocks" + tags: + - Monitoring + x-requirements: ["AC-ASA-64", "AC-AISVC-107"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: wordId + in: path + required: true + schema: + type: string + - name: page + in: query + schema: + type: integer + default: 1 + - name: pageSize + in: query + schema: + type: integer + default: 20 + responses: + '200': + description: "拦截记录列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/GuardrailBlockRecord" + pagination: + $ref: "#/components/schemas/PageInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/conversations: + get: + summary: "查询对话追踪列表" + operationId: "getConversations" + tags: + - Monitoring + x-requirements: ["AC-ASA-65", "AC-AISVC-108"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: startTime + in: query + schema: + type: string + format: date-time + - name: endTime + in: query + schema: + type: string + format: date-time + - name: ruleId + in: query + schema: + type: string + description: "意图规则 ID 筛选" + - name: templateId + in: query + schema: + type: string + description: "Prompt 模板 ID 筛选" + - name: flowId + in: query + schema: + type: string + description: "话术流程 ID 筛选" + - name: page + in: query + schema: + type: integer + default: 1 + - name: pageSize + in: query + schema: + type: integer + default: 20 + responses: + '200': + description: "对话记录列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ConversationRecord" + pagination: + $ref: "#/components/schemas/PageInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/conversations/{conversationId}: + get: + summary: "查询对话执行链路详情" + operationId: "getConversationDetail" + tags: + - Monitoring + x-requirements: ["AC-ASA-66", "AC-AISVC-109"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: conversationId + in: path + required: true + schema: + type: string + responses: + '200': + description: "对话执行链路详情" + content: + application/json: + schema: + $ref: "#/components/schemas/FlowExecutionResult" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + + /admin/monitoring/conversations/export: + post: + summary: "导出对话记录" + operationId: "exportConversations" + tags: + - Monitoring + x-requirements: ["AC-ASA-68", "AC-AISVC-110"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [json, csv] + default: json + startTime: + type: string + format: date-time + endTime: + type: string + format: date-time + filters: + type: object + properties: + ruleId: + type: string + templateId: + type: string + flowId: + type: string + responses: + '200': + description: "导出文件" + content: + application/json: + schema: + type: object + properties: + downloadUrl: + type: string + description: "下载链接" + text/csv: + schema: + type: string + format: binary + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" diff --git a/spec/ai-service/requirements.md b/spec/ai-service/requirements.md index f82d61f..b10f9b1 100644 --- a/spec/ai-service/requirements.md +++ b/spec/ai-service/requirements.md @@ -1,12 +1,12 @@ --- feature_id: "AISVC" title: "Python AI 中台(ai-service)需求规范" -status: "completed" -version: "0.4.0" +status: "in-progress" +version: "0.6.0" owners: - "product" - "backend" -last_updated: "2026-02-24" +last_updated: "2026-02-25" source: type: "conversation" ref: "" @@ -306,3 +306,344 @@ source: | AC-AISVC-48 | /admin/rag/experiments/stream | POST | runRagExperimentStream | RAG 实验流式输出 | | AC-AISVC-49 | /admin/rag/experiments/run | POST | runRagExperiment | Token 统计 | | AC-AISVC-50 | /admin/rag/experiments/run | POST | runRagExperiment | 指定 LLM 提供者 | + +--- + +## 12. 迭代需求:智能客服增强 — Prompt 模板化 + 多知识库 + 规则引擎 + 输出护栏(v0.6.0) + +> 说明:本节为 v0.6.0 迭代新增。目标是将 AI 中台从"单知识库 + 硬编码 Prompt"升级为可配置的拟人客服中台,支持: +> - Prompt 模板数据库驱动(按租户配置人设、语气、禁忌词) +> - 多知识库分类管理与智能路由 +> - 意图识别 + 话术流程引擎(固定步骤引导) +> - 输出护栏(禁词检测、敏感内容过滤) +> - 智能 RAG 增强(Query 改写、上下文感知检索、分层知识优先级) + +### 12.1 Prompt 模板化(数据库驱动) + +- [AC-AISVC-51] WHEN 系统启动或租户首次请求 THEN 系统 SHALL 从数据库加载该租户的 Prompt 模板配置(人设名称、语气风格、角色描述、系统指令),替代硬编码的 `SYSTEM_PROMPT`。 + +- [AC-AISVC-52] WHEN 管理员通过 `POST /admin/prompt-templates` 创建 Prompt 模板 THEN 系统 SHALL 存储模板内容(含模板名称、场景标签、系统指令、变量占位符列表),并返回模板 ID。 + +- [AC-AISVC-53] WHEN 管理员通过 `PUT /admin/prompt-templates/{tplId}` 更新模板 THEN 系统 SHALL 创建新版本(版本号自增),保留历史版本,且同一时间仅一个版本为"已发布"状态。 + +- [AC-AISVC-54] WHEN 管理员通过 `POST /admin/prompt-templates/{tplId}/publish` 发布指定版本 THEN 系统 SHALL 将该版本标记为"已发布",旧版本自动降级为"历史版本",发布后立即生效(热更新)。 + +- [AC-AISVC-55] WHEN 管理员通过 `POST /admin/prompt-templates/{tplId}/rollback` 回滚 THEN 系统 SHALL 将指定历史版本重新标记为"已发布"。 + +- [AC-AISVC-56] WHEN Orchestrator 构建 Prompt 时 THEN 系统 SHALL 从已发布的模板中读取系统指令,并替换内置变量(如 `{{persona_name}}`、`{{current_time}}`、`{{channel_type}}`),最终拼接为 LLM 的 system message。 + +- [AC-AISVC-57] WHEN 管理员通过 `GET /admin/prompt-templates` 查询模板列表 THEN 系统 SHALL 返回该租户下所有模板(含场景标签、当前发布版本号、更新时间),支持按场景筛选。 + +- [AC-AISVC-58] WHEN 管理员通过 `GET /admin/prompt-templates/{tplId}` 查询模板详情 THEN 系统 SHALL 返回模板所有版本列表及当前发布版本的完整内容。 + +### 12.2 多知识库分类管理 + +- [AC-AISVC-59] WHEN 管理员通过 `POST /admin/kb/knowledge-bases` 创建知识库 THEN 系统 SHALL 创建独立的知识库实体,包含名称、类型(`product` 产品知识 / `faq` 常见问题 / `script` 话术模板 / `policy` 政策规范 / `general` 通用)、描述、优先级权重,并在 Qdrant 中初始化对应的 Collection。 + +- [AC-AISVC-60] WHEN 管理员通过 `GET /admin/kb/knowledge-bases` 查询知识库列表 THEN 系统 SHALL 返回该租户下所有知识库(含文档数量、索引状态统计、类型、优先级)。 + +- [AC-AISVC-61] WHEN 管理员通过 `PUT /admin/kb/knowledge-bases/{kbId}` 更新知识库 THEN 系统 SHALL 支持修改名称、描述、类型、优先级权重、启用/禁用状态。 + +- [AC-AISVC-62] WHEN 管理员通过 `DELETE /admin/kb/knowledge-bases/{kbId}` 删除知识库 THEN 系统 SHALL 删除知识库实体、关联文档记录,并清理 Qdrant 中对应的 Collection 数据。 + +- [AC-AISVC-63] WHEN 文档上传时指定 `kbId` THEN 系统 SHALL 将文档索引到指定知识库对应的 Qdrant Collection(替代原有的 `kb_default` 硬编码)。 + +- [AC-AISVC-64] WHEN Orchestrator 执行 RAG 检索 THEN 系统 SHALL 根据意图路由结果选择目标知识库集合进行检索,而非全库搜索。若未命中意图规则,则按知识库优先级权重依次检索。 + +### 12.3 意图识别与规则引擎 + +- [AC-AISVC-65] WHEN 管理员通过 `POST /admin/intent-rules` 创建意图规则 THEN 系统 SHALL 存储规则(含意图名称、关键词列表、正则模式列表、优先级、响应类型 `flow` / `rag` / `fixed` / `transfer`、关联的知识库 ID 列表或话术流程 ID 或固定回复内容)。 + +- [AC-AISVC-66] WHEN 管理员通过 `GET /admin/intent-rules` 查询意图规则列表 THEN 系统 SHALL 返回该租户下所有规则(含命中统计、启用状态),支持按意图名称和响应类型筛选。 + +- [AC-AISVC-67] WHEN 管理员通过 `PUT /admin/intent-rules/{ruleId}` 更新规则 THEN 系统 SHALL 支持修改关键词、正则、优先级、响应类型、关联资源,更新后立即生效。 + +- [AC-AISVC-68] WHEN 管理员通过 `DELETE /admin/intent-rules/{ruleId}` 删除规则 THEN 系统 SHALL 删除规则并立即生效。 + +- [AC-AISVC-69] WHEN 用户消息进入 Orchestrator THEN 系统 SHALL 在 RAG 检索之前执行意图识别:按优先级遍历规则,依次进行关键词匹配和正则匹配。命中规则后根据 `response_type` 路由: + - `fixed`:直接返回固定回复,跳过 LLM 调用。 + - `flow`:进入话术流程引擎,按步骤推进。 + - `rag`:使用规则关联的知识库集合进行定向检索。 + - `transfer`:直接设置 `shouldTransfer=true` 并返回转人工话术。 + +- [AC-AISVC-70] WHEN 所有规则均未命中 THEN 系统 SHALL 回退到默认 RAG pipeline(按知识库优先级检索),行为与现有逻辑一致。 + +### 12.4 话术流程引擎(状态机) + +- [AC-AISVC-71] WHEN 管理员通过 `POST /admin/script-flows` 创建话术流程 THEN 系统 SHALL 存储流程定义(含流程名称、触发条件描述、步骤列表),每个步骤包含:步骤序号、话术内容(支持变量占位符)、等待用户输入标志、超时秒数、超时后动作(重复/跳过/转人工)、下一步条件(关键词匹配或无条件推进)。 + +- [AC-AISVC-72] WHEN 管理员通过 `GET /admin/script-flows` 查询流程列表 THEN 系统 SHALL 返回该租户下所有流程(含步骤数、启用状态、关联意图规则数)。 + +- [AC-AISVC-73] WHEN 管理员通过 `PUT /admin/script-flows/{flowId}` 更新流程 THEN 系统 SHALL 支持修改流程步骤、触发条件、启用/禁用状态。 + +- [AC-AISVC-74] WHEN 意图识别命中 `flow` 类型规则 THEN 系统 SHALL 为当前会话创建流程执行实例(关联 `session_id` + `flow_id`),从第一步开始执行,返回第一步的话术内容。 + +- [AC-AISVC-75] WHEN 会话存在进行中的流程实例 THEN 系统 SHALL 优先处理流程逻辑:根据用户输入匹配当前步骤的下一步条件,推进到下一步骤并返回对应话术。若用户输入不匹配任何条件,则重复当前步骤话术或按配置处理。 + +- [AC-AISVC-76] WHEN 流程执行到最后一步或用户触发退出条件 THEN 系统 SHALL 标记流程实例为"已完成",后续消息恢复正常 RAG pipeline 处理。 + +- [AC-AISVC-77] WHEN 流程步骤超时(用户在指定时间内未回复) THEN 系统 SHALL 按步骤配置执行超时动作(重复当前话术 / 跳到下一步 / 转人工)。注:超时检测由调用方(Java 侧)触发,AI 中台提供查询流程状态的接口。 + +### 12.5 输出护栏(禁词检测与内容过滤) + +- [AC-AISVC-78] WHEN 管理员通过 `POST /admin/guardrails/forbidden-words` 添加禁词 THEN 系统 SHALL 存储禁词(含词语、类别 `competitor` / `sensitive` / `political` / `custom`、替换策略 `mask` 星号替换 / `replace` 替换为指定文本 / `block` 拦截整条回复并返回兜底话术)。 + +- [AC-AISVC-79] WHEN 管理员通过 `GET /admin/guardrails/forbidden-words` 查询禁词列表 THEN 系统 SHALL 返回该租户下所有禁词(支持按类别筛选),含命中统计。 + +- [AC-AISVC-80] WHEN 管理员通过 `PUT /admin/guardrails/forbidden-words/{wordId}` 更新禁词 THEN 系统 SHALL 支持修改词语、类别、替换策略、启用/禁用状态。 + +- [AC-AISVC-81] WHEN 管理员通过 `DELETE /admin/guardrails/forbidden-words/{wordId}` 删除禁词 THEN 系统 SHALL 删除禁词并立即生效。 + +- [AC-AISVC-82] WHEN LLM 生成回复后(non-streaming 和 streaming 均适用) THEN 系统 SHALL 执行后置过滤:扫描回复内容中的禁词,按替换策略处理: + - `mask`:将禁词替换为等长星号(如"竞品A" → "***")。 + - `replace`:将禁词替换为配置的替换文本。 + - `block`:丢弃整条回复,返回预配置的兜底话术(如"抱歉,让我换个方式回答您"),并在 metadata 中标记 `guardrail_triggered=true`。 + +- [AC-AISVC-83] WHEN 用户输入包含禁词(前置检测) THEN 系统 SHALL 在 metadata 中记录 `input_flagged=true` 及命中的禁词类别,但不阻断请求处理(仅记录,不拦截用户输入)。 + +- [AC-AISVC-84] WHEN 管理员通过 `POST /admin/guardrails/behavior-rules` 添加行为规则 THEN 系统 SHALL 存储行为约束(如"不允许承诺具体赔偿金额"、"不允许透露内部流程"),这些规则将被注入到 Prompt 模板的系统指令中作为 LLM 的行为约束。 + +- [AC-AISVC-85] WHEN 管理员通过 `GET /admin/guardrails/behavior-rules` 查询行为规则列表 THEN 系统 SHALL 返回该租户下所有行为规则。 + +### 12.6 智能 RAG 增强 + +- [AC-AISVC-86] WHEN Orchestrator 执行 RAG 检索前 THEN 系统 SHALL 对用户 Query 进行改写增强:结合对话历史解析指代词(如"它"、"这个"指代上文提到的产品),补全查询语义,生成优化后的检索 Query。Query 改写通过 LLM 调用实现(使用简短的改写 Prompt)。 + +- [AC-AISVC-87] WHEN 多知识库检索返回结果后 THEN 系统 SHALL 按知识库优先级对结果进行分层排序:`script`(话术模板)> `faq`(常见问题)> `product`(产品知识)> `policy`(政策规范)> `general`(通用),同层内按相似度分数排序。高优先级知识库的命中结果优先进入 LLM 上下文。 + +- [AC-AISVC-88] WHEN 检索结果中存在 `script` 类型知识库的高分命中(score > 配置阈值) THEN 系统 SHALL 优先使用话术模板内容作为回复参考,引导 LLM 生成贴近标准话术的回复。 + +### 12.7 数据预处理扩展点(架构预留) + +- [AC-AISVC-89] WHEN 系统设计文档解析服务时 THEN 系统 SHALL 在 `DocumentParser` 抽象接口中预留 `AudioParser`(语音转文字)和 `VideoParser`(视频提取音轨转文字)的扩展点,但 v0.6.0 不实现具体解析逻辑。 + +- [AC-AISVC-90] WHEN 系统设计知识库索引服务时 THEN 系统 SHALL 在 `KnowledgeIndexer` 中预留"对话记录结构化导入"接口(接受结构化 JSON 格式的对话记录,按轮次分块索引),但 v0.6.0 不实现具体导入逻辑。 + +### 12.8 追踪映射(v0.6.0 迭代) + +| AC ID | Endpoint | 方法 | Operation | 描述 | +|-------|----------|------|-----------|------| +| AC-AISVC-51 | - | - | - | Prompt 模板数据库驱动加载 | +| AC-AISVC-52 | /admin/prompt-templates | POST | createPromptTemplate | 创建 Prompt 模板 | +| AC-AISVC-53 | /admin/prompt-templates/{tplId} | PUT | updatePromptTemplate | 更新模板(版本化) | +| AC-AISVC-54 | /admin/prompt-templates/{tplId}/publish | POST | publishPromptTemplate | 发布模板版本 | +| AC-AISVC-55 | /admin/prompt-templates/{tplId}/rollback | POST | rollbackPromptTemplate | 回滚模板版本 | +| AC-AISVC-56 | - | - | - | Prompt 变量替换与拼接 | +| AC-AISVC-57 | /admin/prompt-templates | GET | listPromptTemplates | 模板列表查询 | +| AC-AISVC-58 | /admin/prompt-templates/{tplId} | GET | getPromptTemplate | 模板详情查询 | +| AC-AISVC-59 | /admin/kb/knowledge-bases | POST | createKnowledgeBase | 创建知识库 | +| AC-AISVC-60 | /admin/kb/knowledge-bases | GET | listKnowledgeBases | 知识库列表 | +| AC-AISVC-61 | /admin/kb/knowledge-bases/{kbId} | PUT | updateKnowledgeBase | 更新知识库 | +| AC-AISVC-62 | /admin/kb/knowledge-bases/{kbId} | DELETE | deleteKnowledgeBase | 删除知识库 | +| AC-AISVC-63 | /admin/kb/documents | POST | uploadDocument | 文档指定知识库上传 | +| AC-AISVC-64 | - | - | - | 多知识库智能路由检索 | +| AC-AISVC-65 | /admin/intent-rules | POST | createIntentRule | 创建意图规则 | +| AC-AISVC-66 | /admin/intent-rules | GET | listIntentRules | 意图规则列表 | +| AC-AISVC-67 | /admin/intent-rules/{ruleId} | PUT | updateIntentRule | 更新意图规则 | +| AC-AISVC-68 | /admin/intent-rules/{ruleId} | DELETE | deleteIntentRule | 删除意图规则 | +| AC-AISVC-69 | - | - | - | 意图识别与路由执行 | +| AC-AISVC-70 | - | - | - | 未命中规则回退默认 RAG | +| AC-AISVC-71 | /admin/script-flows | POST | createScriptFlow | 创建话术流程 | +| AC-AISVC-72 | /admin/script-flows | GET | listScriptFlows | 话术流程列表 | +| AC-AISVC-73 | /admin/script-flows/{flowId} | PUT | updateScriptFlow | 更新话术流程 | +| AC-AISVC-74 | - | - | - | 流程实例创建与首步执行 | +| AC-AISVC-75 | - | - | - | 流程步骤推进 | +| AC-AISVC-76 | - | - | - | 流程完成与恢复 | +| AC-AISVC-77 | - | - | - | 流程超时处理 | +| AC-AISVC-78 | /admin/guardrails/forbidden-words | POST | addForbiddenWord | 添加禁词 | +| AC-AISVC-79 | /admin/guardrails/forbidden-words | GET | listForbiddenWords | 禁词列表 | +| AC-AISVC-80 | /admin/guardrails/forbidden-words/{wordId} | PUT | updateForbiddenWord | 更新禁词 | +| AC-AISVC-81 | /admin/guardrails/forbidden-words/{wordId} | DELETE | deleteForbiddenWord | 删除禁词 | +| AC-AISVC-82 | - | - | - | LLM 输出后置禁词过滤 | +| AC-AISVC-83 | - | - | - | 用户输入前置禁词检测 | +| AC-AISVC-84 | /admin/guardrails/behavior-rules | POST | addBehaviorRule | 添加行为规则 | +| AC-AISVC-85 | /admin/guardrails/behavior-rules | GET | listBehaviorRules | 行为规则列表 | +| AC-AISVC-86 | - | - | - | Query 改写增强 | +| AC-AISVC-87 | - | - | - | 多知识库分层排序 | +| AC-AISVC-88 | - | - | - | 话术模板优先匹配 | +| AC-AISVC-89 | - | - | - | 音视频解析扩展点预留 | +| AC-AISVC-90 | - | - | - | 对话记录导入扩展点预留 | + +--- + +## 13. 迭代需求:对话流程测试与监控 API(v0.7.0) + +> 说明:本节为 v0.7.0 迭代新增,为前端测试与监控功能提供后端 API 支持。 + +### 13.1 Dashboard 统计增强 API + +- [AC-AISVC-91] WHEN 前端通过 `GET /admin/dashboard/stats` 请求 Dashboard 统计数据 THEN 系统 SHALL 在现有统计基础上新增以下字段: + - `intentRuleHitRate`: 意图规则命中率(命中次数/总对话次数) + - `intentRuleHitCount`: 意图规则总命中次数 + - `promptTemplateUsageCount`: Prompt 模板使用次数 + - `scriptFlowActivationCount`: 话术流程激活次数 + - `guardrailBlockCount`: 护栏拦截次数 + +- [AC-AISVC-92] WHEN 前端通过 `GET /admin/dashboard/stats` 请求并指定时间范围参数(`start_time`, `end_time`)THEN 系统 SHALL 返回该时间范围内的统计数据。 + +### 13.2 完整流程测试 API + +- [AC-AISVC-93] WHEN 前端通过 `POST /admin/test/flow-execution` 提交测试请求 THEN 系统 SHALL 执行完整的 12 步生成流程,并返回每一步的详细执行结果,包含: + - `step`: 步骤编号(1-12) + - `name`: 步骤名称 + - `status`: 执行状态(success/failed/skipped) + - `duration_ms`: 执行耗时(毫秒) + - `input`: 步骤输入数据 + - `output`: 步骤输出数据 + - `error`: 错误信息(如果失败) + - `metadata`: 步骤元数据(如命中的规则、使用的模板等) + +- [AC-AISVC-94] WHEN 测试请求包含对比配置参数(如不同的 Prompt 模板 ID、知识库 ID 列表)THEN 系统 SHALL 并行执行多个配置的测试,并返回对比结果数组。 + +- [AC-AISVC-95] WHEN 测试执行过程中某一步失败 THEN 系统 SHALL 记录失败原因,并继续执行后续步骤(尽力而为模式),最终返回完整的执行链路。 + +### 13.3 意图规则测试与监控 API + +- [AC-AISVC-96] WHEN 前端通过 `POST /admin/intent-rules/{ruleId}/test` 提交测试消息 THEN 系统 SHALL 返回测试结果: + - `matched`: 是否命中该规则(boolean) + - `matchedKeywords`: 匹配的关键词列表 + - `matchedPatterns`: 匹配的正则表达式列表 + - `priority`: 规则优先级 + - `priorityRank`: 在所有规则中的优先级排名 + - `conflictRules`: 同时命中的其他规则列表(优先级冲突检测) + - `reason`: 未命中原因(如果未命中) + +- [AC-AISVC-97] WHEN 前端通过 `GET /admin/monitoring/intent-rules` 查询意图规则监控统计 THEN 系统 SHALL 返回规则统计列表: + - `ruleId`: 规则 ID + - `ruleName`: 规则名称 + - `hitCount`: 命中次数 + - `hitRate`: 命中率(命中次数/总对话次数) + - `avgResponseTime`: 平均响应时间(毫秒) + - `lastHitTime`: 最近命中时间 + - `responseType`: 响应类型 + +- [AC-AISVC-98] WHEN 前端通过 `GET /admin/monitoring/intent-rules/{ruleId}/hits` 查询规则命中记录 THEN 系统 SHALL 返回该规则的详细命中记录列表(支持分页): + - `conversationId`: 对话 ID + - `sessionId`: 会话 ID + - `userMessage`: 用户消息 + - `matchedKeywords`: 匹配的关键词 + - `matchedPatterns`: 匹配的正则表达式 + - `responseType`: 响应类型 + - `executionResult`: 执行结果(成功/失败) + - `hitTime`: 命中时间 + +### 13.4 Prompt 模板测试与监控 API + +- [AC-AISVC-99] WHEN 前端通过 `POST /admin/prompt-templates/{tplId}/preview` 提交预览请求(含变量测试值)THEN 系统 SHALL 返回预览结果: + - `templateId`: 模板 ID + - `templateName`: 模板名称 + - `version`: 版本号 + - `rawContent`: 原始模板内容(含变量占位符) + - `variables`: 变量列表及当前值 + - `renderedContent`: 渲染后的完整 Prompt 内容 + - `estimatedTokens`: 预估 Token 数量 + +- [AC-AISVC-100] WHEN 前端通过 `GET /admin/monitoring/prompt-templates` 查询 Prompt 模板监控统计 THEN 系统 SHALL 返回模板统计列表: + - `templateId`: 模板 ID + - `templateName`: 模板名称 + - `scene`: 场景标签 + - `usageCount`: 使用次数 + - `avgTokens`: 平均 Token 消耗 + - `avgPromptTokens`: 平均 Prompt Token 消耗 + - `avgCompletionTokens`: 平均 Completion Token 消耗 + - `lastUsedTime`: 最近使用时间 + +### 13.5 话术流程测试与监控 API + +- [AC-AISVC-101] WHEN 前端通过 `POST /admin/script-flows/{flowId}/simulate` 提交模拟执行请求 THEN 系统 SHALL 创建模拟会话并返回首步话术: + - `simulationId`: 模拟会话 ID + - `flowId`: 流程 ID + - `flowName`: 流程名称 + - `currentStep`: 当前步骤编号 + - `stepContent`: 当前步骤话术内容 + - `waitForInput`: 是否等待用户输入 + - `nextConditions`: 下一步条件列表 + +- [AC-AISVC-102] WHEN 前端通过 `POST /admin/script-flows/{flowId}/simulate/{simulationId}/next` 提交用户模拟输入 THEN 系统 SHALL 根据下一步条件推进流程,并返回下一步话术或流程结束标志。 + +- [AC-AISVC-103] WHEN 前端通过 `GET /admin/monitoring/script-flows` 查询话术流程监控统计 THEN 系统 SHALL 返回流程统计列表: + - `flowId`: 流程 ID + - `flowName`: 流程名称 + - `activationCount`: 激活次数 + - `completionCount`: 完成次数 + - `completionRate`: 完成率(完成次数/激活次数) + - `avgCompletionTime`: 平均完成时长(秒) + - `interruptionCount`: 中断次数 + - `interruptionRate`: 中断率 + +- [AC-AISVC-104] WHEN 前端通过 `GET /admin/monitoring/script-flows/{flowId}/executions` 查询流程执行记录 THEN 系统 SHALL 返回该流程的详细执行记录列表(支持分页): + - `executionId`: 执行实例 ID + - `sessionId`: 会话 ID + - `activationTime`: 激活时间 + - `currentStep`: 当前步骤 + - `status`: 执行状态(in_progress/completed/interrupted) + - `interruptionReason`: 中断原因(如果中断) + - `completionTime`: 完成时间 + +### 13.6 输出护栏测试与监控 API + +- [AC-AISVC-105] WHEN 前端通过 `POST /admin/guardrails/test` 提交测试文本 THEN 系统 SHALL 返回护栏检测结果: + - `originalText`: 原始文本 + - `processedText`: 处理后文本 + - `detectedWords`: 检测到的禁词列表(含词语、类别、位置、策略) + - `isBlocked`: 是否被拦截(boolean) + - `blockReason`: 拦截原因(如果被拦截) + +- [AC-AISVC-106] WHEN 前端通过 `GET /admin/monitoring/guardrails` 查询输出护栏监控统计 THEN 系统 SHALL 返回护栏统计列表: + - `wordId`: 禁词 ID + - `word`: 禁词内容 + - `category`: 类别 + - `blockCount`: 拦截次数 + - `replaceCount`: 替换次数 + - `maskCount`: 掩码次数 + - `lastBlockTime`: 最近拦截时间 + +- [AC-AISVC-107] WHEN 前端通过 `GET /admin/monitoring/guardrails/{wordId}/blocks` 查询禁词拦截记录 THEN 系统 SHALL 返回该禁词的详细拦截记录列表(支持分页): + - `blockId`: 拦截记录 ID + - `sessionId`: 会话 ID + - `originalText`: 原始文本 + - `processedText`: 处理后文本 + - `strategy`: 应用的策略(mask/replace/block) + - `blockTime`: 拦截时间 + +### 13.7 对话追踪 API + +- [AC-AISVC-108] WHEN 前端通过 `GET /admin/monitoring/conversations` 查询对话追踪列表 THEN 系统 SHALL 返回对话记录列表(支持分页和筛选): + - `conversationId`: 对话 ID + - `sessionId`: 会话 ID + - `userMessage`: 用户消息 + - `aiReply`: AI 回复 + - `triggeredRules`: 触发的意图规则列表 + - `usedTemplate`: 使用的 Prompt 模板 + - `usedFlow`: 使用的话术流程 + - `executionTime`: 执行耗时(毫秒) + - `createdAt`: 创建时间 + +- [AC-AISVC-109] WHEN 前端通过 `GET /admin/monitoring/conversations/{conversationId}` 查询对话执行链路详情 THEN 系统 SHALL 返回该对话的完整 12 步执行链路数据(与测试 API 返回格式一致)。 + +- [AC-AISVC-110] WHEN 前端通过 `POST /admin/monitoring/conversations/export` 提交导出请求(含筛选条件)THEN 系统 SHALL 生成导出文件(JSON/CSV 格式),并返回下载链接或直接返回文件流。 + +### 13.8 追踪映射(v0.7.0 迭代) + +| AC ID | Endpoint | 方法 | Operation | 描述 | +|-------|----------|------|-----------|------| +| AC-AISVC-91 | /admin/dashboard/stats | GET | getDashboardStats | Dashboard 统计增强 | +| AC-AISVC-92 | /admin/dashboard/stats | GET | getDashboardStats | 时间范围筛选 | +| AC-AISVC-93 | /admin/test/flow-execution | POST | testFlowExecution | 完整流程测试 | +| AC-AISVC-94 | /admin/test/flow-execution | POST | testFlowExecution | 对比测试 | +| AC-AISVC-95 | /admin/test/flow-execution | POST | testFlowExecution | 失败容错处理 | +| AC-AISVC-96 | /admin/intent-rules/{ruleId}/test | POST | testIntentRule | 意图规则测试 | +| AC-AISVC-97 | /admin/monitoring/intent-rules | GET | getIntentRuleStats | 意图规则监控统计 | +| AC-AISVC-98 | /admin/monitoring/intent-rules/{ruleId}/hits | GET | getIntentRuleHits | 规则命中记录 | +| AC-AISVC-99 | /admin/prompt-templates/{tplId}/preview | POST | previewPromptTemplate | Prompt 模板预览 | +| AC-AISVC-100 | /admin/monitoring/prompt-templates | GET | getPromptTemplateStats | Prompt 模板监控统计 | +| AC-AISVC-101 | /admin/script-flows/{flowId}/simulate | POST | simulateScriptFlow | 话术流程模拟执行 | +| AC-AISVC-102 | /admin/script-flows/{flowId}/simulate/{simulationId}/next | POST | simulateScriptFlowNext | 流程模拟推进 | +| AC-AISVC-103 | /admin/monitoring/script-flows | GET | getScriptFlowStats | 话术流程监控统计 | +| AC-AISVC-104 | /admin/monitoring/script-flows/{flowId}/executions | GET | getScriptFlowExecutions | 流程执行记录 | +| AC-AISVC-105 | /admin/guardrails/test | POST | testGuardrail | 输出护栏测试 | +| AC-AISVC-106 | /admin/monitoring/guardrails | GET | getGuardrailStats | 输出护栏监控统计 | +| AC-AISVC-107 | /admin/monitoring/guardrails/{wordId}/blocks | GET | getGuardrailBlocks | 禁词拦截记录 | +| AC-AISVC-108 | /admin/monitoring/conversations | GET | getConversations | 对话追踪列表 | +| AC-AISVC-109 | /admin/monitoring/conversations/{conversationId} | GET | getConversationDetail | 对话执行链路详情 | +| AC-AISVC-110 | /admin/monitoring/conversations/export | POST | exportConversations | 对话记录导出 | diff --git a/spec/ai-service/tasks.md b/spec/ai-service/tasks.md index 455bb00..8dcf390 100644 --- a/spec/ai-service/tasks.md +++ b/spec/ai-service/tasks.md @@ -2,7 +2,7 @@ feature_id: "AISVC" title: "Python AI 中台(ai-service)任务清单" status: "in-progress" -version: "0.6.0" +version: "0.7.0" last_updated: "2026-02-27" --- @@ -247,3 +247,170 @@ last_updated: "2026-02-27" - [ ] T15.6 预留 `AudioParser` 和 `VideoParser` 扩展点(仅接口定义,不实现) `[AC-AISVC-89]` - [ ] T15.7 预留对话记录结构化导入接口(仅接口定义,不实现) `[AC-AISVC-90]` - [ ] T15.8 编写 Orchestrator 升级集成测试:验证意图路由 → 流程引擎 → 多知识库检索 → 护栏过滤的完整链路 `[AC-AISVC-51~AC-AISVC-90]` + + +--- + +### Phase 16: 测试与监控系统(v0.7.0 迭代) + +> 目标:为 v0.6.0 新增的四大功能提供完整的测试和监控 API,支持前端测试台和监控页面。 + +#### 16.1 监控数据模型与基础设施 + +- [ ] T16.1 定义监控数据实体:扩展 `ChatMessage` 实体,新增 `prompt_template_id`、`intent_rule_id`、`flow_instance_id`、`guardrail_triggered`、`guardrail_words` 字段 `[AC-AISVC-91, AC-AISVC-92]` + +- [ ] T16.2 实现 Redis 统计缓存层:创建 `MonitoringCache` 服务,支持实时计数器和 Top N 排行榜缓存 `[AC-AISVC-91, AC-AISVC-92]` + +- [ ] T16.3 实现 `FlowTestRecord` 实体:用于存储完整流程测试的详细日志(保留 7 天) `[AC-AISVC-93, AC-AISVC-94, AC-AISVC-95]` + +- [ ] T16.4 实现 `ExportTask` 实体:用于管理对话导出任务的异步处理 `[AC-AISVC-110]` + +- [ ] T16.5 创建监控数据库索引:优化查询性能(按 tenant_id、created_at、intent_rule_id、flow_instance_id 等) `[AC-AISVC-91~AC-AISVC-110]` + +#### 16.2 Dashboard 统计增强 + +- [ ] T16.6 实现 `DashboardService.get_enhanced_stats()`:聚合意图规则、Prompt 模板、话术流程、护栏的统计数据 `[AC-AISVC-91]` + +- [ ] T16.7 实现统计数据缓存策略:Dashboard 统计结果缓存 60 秒,Top N 排行榜每 5 分钟预计算 `[AC-AISVC-91]` + +- [ ] T16.8 实现 `GET /admin/dashboard/stats` API 增强:支持时间范围筛选(startDate/endDate 参数) `[AC-AISVC-91, AC-AISVC-92]` + +#### 16.3 完整流程测试台 + +- [ ] T16.9 增强 `Orchestrator.generate_with_monitoring()`:支持可选的详细日志记录(enable_detailed_log 参数) `[AC-AISVC-93]` + +- [ ] T16.10 实现 12 步流程埋点:在 Orchestrator 的每一步中记录输入/输出/耗时/错误(仅在测试模式下) `[AC-AISVC-93, AC-AISVC-94, AC-AISVC-95]` + +- [ ] T16.11 实现 `FlowTestService.test_flow_execution()`:调用增强版 Orchestrator 并保存测试记录 `[AC-AISVC-93]` + +- [ ] T16.12 实现 `POST /admin/test/flow-execution` API:支持完整流程测试,返回 12 步执行详情 `[AC-AISVC-93, AC-AISVC-94, AC-AISVC-95]` + +#### 16.4 意图规则测试与监控 + +- [ ] T16.13 实现 `IntentRuleTester.test_rule()`:测试单个规则并检测优先级冲突 `[AC-AISVC-96]` + +- [ ] T16.14 实现 `POST /admin/intent-rules/{ruleId}/test` API:返回测试结果和冲突检测 `[AC-AISVC-96]` + +- [ ] T16.15 实现 `IntentMonitor.get_rule_stats()`:聚合规则命中统计(命中次数、命中率、平均响应时间) `[AC-AISVC-97]` + +- [ ] T16.16 实现 `GET /admin/monitoring/intent-rules` API:返回规则统计列表,支持时间范围筛选 `[AC-AISVC-97]` + +- [ ] T16.17 实现 `GET /admin/monitoring/intent-rules/{ruleId}/hits` API:返回规则命中记录详情,支持分页 `[AC-AISVC-98]` + +#### 16.5 Prompt 模板测试与监控 + +- [ ] T16.18 实现 `PromptTemplateMonitor.preview_template()`:渲染模板并计算 Token 数量(使用 tiktoken) `[AC-AISVC-99]` + +- [ ] T16.19 实现 `POST /admin/prompt-templates/{tplId}/preview` API:返回渲染结果和 Token 统计 `[AC-AISVC-99]` + +- [ ] T16.20 实现 `PromptMonitor.get_template_stats()`:聚合模板使用统计(使用次数、平均 Token 消耗) `[AC-AISVC-100]` + +- [ ] T16.21 实现 `GET /admin/monitoring/prompt-templates` API:返回模板统计列表,支持按场景筛选 `[AC-AISVC-100]` + +#### 16.6 话术流程测试与监控 + +- [ ] T16.22 实现 `ScriptFlowTester.simulate_flow()`:模拟流程执行并分析覆盖率 `[AC-AISVC-101]` + +- [ ] T16.23 实现 `POST /admin/script-flows/{flowId}/simulate` API:创建模拟会话并返回首步话术 `[AC-AISVC-101]` + +- [ ] T16.24 实现 `POST /admin/script-flows/{flowId}/simulate/{simulationId}/next` API:推进模拟流程 `[AC-AISVC-102]` + +- [ ] T16.25 实现 `FlowMonitor.get_flow_stats()`:聚合流程激活统计(激活次数、完成率、平均完成时长) `[AC-AISVC-103]` + +- [ ] T16.26 实现 `GET /admin/monitoring/script-flows` API:返回流程统计列表 `[AC-AISVC-103]` + +- [ ] T16.27 实现 `GET /admin/monitoring/script-flows/{flowId}/executions` API:返回流程执行记录详情,支持分页 `[AC-AISVC-104]` + +#### 16.7 输出护栏测试与监控 + +- [ ] T16.28 实现 `GuardrailTester.test_guardrail()`:测试禁词检测和过滤策略 `[AC-AISVC-105]` + +- [ ] T16.29 实现 `POST /admin/guardrails/test` API:返回护栏检测结果 `[AC-AISVC-105]` + +- [ ] T16.30 实现 `GuardrailMonitor.get_guardrail_stats()`:聚合护栏拦截统计(拦截次数、替换次数、掩码次数) `[AC-AISVC-106]` + +- [ ] T16.31 实现 `GET /admin/monitoring/guardrails` API:返回护栏统计列表,支持按类别筛选 `[AC-AISVC-106]` + +- [ ] T16.32 实现 `GET /admin/monitoring/guardrails/{wordId}/blocks` API:返回禁词拦截记录详情,支持分页 `[AC-AISVC-107]` + +#### 16.8 对话追踪与导出 + +- [ ] T16.33 实现 `ConversationTracker.get_conversations()`:查询对话记录列表,支持多条件筛选和分页 `[AC-AISVC-108]` + +- [ ] T16.34 实现 `GET /admin/monitoring/conversations` API:返回对话记录列表 `[AC-AISVC-108]` + +- [ ] T16.35 实现 `ConversationTracker.get_conversation_detail()`:查询对话的完整 12 步执行链路 `[AC-AISVC-109]` + +- [ ] T16.36 实现 `GET /admin/monitoring/conversations/{conversationId}` API:返回对话执行链路详情 `[AC-AISVC-109]` + +- [ ] T16.37 实现 `ConversationExporter.export_conversations()`:异步导出对话数据(CSV/JSON 格式) `[AC-AISVC-110]` + +- [ ] T16.38 实现 `POST /admin/monitoring/conversations/export` API:创建导出任务并返回任务 ID `[AC-AISVC-110]` + +- [ ] T16.39 实现 `GET /admin/monitoring/conversations/export/{exportId}` API:查询导出任务状态和下载链接 `[AC-AISVC-110]` + +#### 16.9 性能优化与测试 + +- [ ] T16.40 实现监控数据异步更新:对话结束时异步更新统计计数器,不阻塞响应 `[AC-AISVC-91, AC-AISVC-92]` + +- [ ] T16.41 实现测试日志自动清理:定时任务清理 7 天前的 `FlowTestRecord` 数据 `[AC-AISVC-93]` + +- [ ] T16.42 实现导出文件自动清理:定时任务清理 24 小时过期的导出文件 `[AC-AISVC-110]` + +- [ ] T16.43 编写监控服务单元测试:覆盖统计聚合、缓存策略、数据导出等核心逻辑 `[AC-AISVC-91~AC-AISVC-110]` + +- [ ] T16.44 编写测试 API 集成测试:验证完整流程测试、规则测试、模板预览等功能 `[AC-AISVC-93~AC-AISVC-105]` + +- [ ] T16.45 性能测试:验证监控埋点对生产环境性能影响 <5%,Dashboard 统计查询 <500ms `[AC-AISVC-91, AC-AISVC-92]` + +--- + +## Phase 16 任务进度追踪 + +| 任务 | 描述 | 状态 | +|------|------|------| +| T16.1 | 监控数据实体扩展 | ⏳ 待处理 | +| T16.2 | Redis 统计缓存层 | ⏳ 待处理 | +| T16.3 | FlowTestRecord 实体 | ⏳ 待处理 | +| T16.4 | ExportTask 实体 | ⏳ 待处理 | +| T16.5 | 监控数据库索引 | ⏳ 待处理 | +| T16.6 | Dashboard 统计聚合 | ⏳ 待处理 | +| T16.7 | 统计缓存策略 | ⏳ 待处理 | +| T16.8 | Dashboard API 增强 | ⏳ 待处理 | +| T16.9 | Orchestrator 监控增强 | ⏳ 待处理 | +| T16.10 | 12 步流程埋点 | ⏳ 待处理 | +| T16.11 | FlowTestService | ⏳ 待处理 | +| T16.12 | 完整流程测试 API | ⏳ 待处理 | +| T16.13 | IntentRuleTester | ⏳ 待处理 | +| T16.14 | 意图规则测试 API | ⏳ 待处理 | +| T16.15 | IntentMonitor | ⏳ 待处理 | +| T16.16 | 意图规则监控 API | ⏳ 待处理 | +| T16.17 | 规则命中记录 API | ⏳ 待处理 | +| T16.18 | PromptTemplateMonitor | ⏳ 待处理 | +| T16.19 | 模板预览 API | ⏳ 待处理 | +| T16.20 | PromptMonitor | ⏳ 待处理 | +| T16.21 | 模板监控 API | ⏳ 待处理 | +| T16.22 | ScriptFlowTester | ⏳ 待处理 | +| T16.23 | 流程模拟 API | ⏳ 待处理 | +| T16.24 | 流程推进 API | ⏳ 待处理 | +| T16.25 | FlowMonitor | ⏳ 待处理 | +| T16.26 | 流程监控 API | ⏳ 待处理 | +| T16.27 | 流程执行记录 API | ⏳ 待处理 | +| T16.28 | GuardrailTester | ⏳ 待处理 | +| T16.29 | 护栏测试 API | ⏳ 待处理 | +| T16.30 | GuardrailMonitor | ⏳ 待处理 | +| T16.31 | 护栏监控 API | ⏳ 待处理 | +| T16.32 | 护栏拦截记录 API | ⏳ 待处理 | +| T16.33 | ConversationTracker | ⏳ 待处理 | +| T16.34 | 对话追踪列表 API | ⏳ 待处理 | +| T16.35 | 对话详情查询 | ⏳ 待处理 | +| T16.36 | 对话详情 API | ⏳ 待处理 | +| T16.37 | ConversationExporter | ⏳ 待处理 | +| T16.38 | 对话导出 API | ⏳ 待处理 | +| T16.39 | 导出状态查询 API | ⏳ 待处理 | +| T16.40 | 异步统计更新 | ⏳ 待处理 | +| T16.41 | 测试日志清理 | ⏳ 待处理 | +| T16.42 | 导出文件清理 | ⏳ 待处理 | +| T16.43 | 监控服务单元测试 | ⏳ 待处理 | +| T16.44 | 测试 API 集成测试 | ⏳ 待处理 | +| T16.45 | 性能测试 | ⏳ 待处理 |