From fc53fdc6ace86a7f68733389f8a6a96c1caa06de Mon Sep 17 00:00:00 2001 From: MerCry Date: Wed, 25 Feb 2026 01:25:53 +0800 Subject: [PATCH] =?UTF-8?q?feat(AISVC-T8):=20LLM=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E7=AE=A1=E7=90=86=E4=B8=8ERAG=E8=B0=83=E8=AF=95=E8=BE=93?= =?UTF-8?q?=E5=87=BA=E6=94=AF=E6=8C=81=20[AC-AISVC-42,=20AC-AISVC-43,=20AC?= =?UTF-8?q?-AISVC-44,=20AC-AISVC-45,=20AC-AISVC-46,=20AC-AISVC-47,=20AC-AI?= =?UTF-8?q?SVC-48,=20AC-AISVC-49,=20AC-AISVC-50]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 LLMProviderFactory 工厂类支持 OpenAI/Ollama/Azure [AC-AISVC-42] - 新增 LLMConfigManager 支持配置热更新 [AC-AISVC-43, AC-AISVC-44] - 新增 LLM 管理 API 端点 [AC-AISVC-42~AC-AISVC-46] - 更新 RAG 实验接口支持 AI 回复生成 [AC-AISVC-47, AC-AISVC-49] - 新增 RAG 实验流式输出 SSE [AC-AISVC-48] - 支持指定 LLM 提供者 [AC-AISVC-50] - 更新 OpenAPI 契约添加 LLM 管理接口 - 更新前后端规范文档 v0.4.0 迭代 --- ai-service-admin/src/App.vue | 1 + ai-service-admin/src/api/dashboard.ts | 3 - ai-service-admin/src/api/embedding.ts | 10 +- ai-service-admin/src/api/kb.ts | 15 - ai-service-admin/src/api/monitoring.ts | 6 - ai-service-admin/src/api/rag.ts | 3 - .../embedding/EmbeddingProviderSelect.vue | 73 +++ .../embedding/EmbeddingTestPanel.vue | 428 ++++++++++--- .../components/embedding/SupportedFormats.vue | 161 +++++ ai-service-admin/src/router/index.ts | 6 + ai-service-admin/src/stores/embedding.ts | 164 +++++ ai-service-admin/src/types/embedding.ts | 49 ++ .../src/views/admin/embedding/index.vue | 504 +++++++++++++++ ai-service/app/api/admin/__init__.py | 3 +- ai-service/app/api/admin/llm.py | 146 +++++ ai-service/app/api/admin/rag.py | 214 ++++++- ai-service/app/main.py | 3 +- ai-service/app/services/llm/factory.py | 332 ++++++++++ ai-service/pyproject.toml | 4 + ai-service/scripts/check_qdrant.py | 80 +++ ai-service/scripts/cleanup_garbage.py | 115 ++++ ai-service/scripts/test_excel_parse.py | 40 ++ docs/progress/ai-service-admin-progress.md | 105 ++- spec/ai-service-admin/openapi.deps.yaml | 598 +++++++++++++++++- spec/ai-service-admin/requirements.md | 91 ++- spec/ai-service-admin/tasks.md | 103 ++- spec/ai-service/openapi.provider.yaml | 429 +++++++++++++ spec/ai-service/progress.md | 62 +- spec/ai-service/requirements.md | 46 +- spec/ai-service/tasks.md | 21 +- test-doc.txt | 18 + 31 files changed, 3613 insertions(+), 220 deletions(-) create mode 100644 ai-service-admin/src/components/embedding/EmbeddingProviderSelect.vue create mode 100644 ai-service-admin/src/components/embedding/SupportedFormats.vue create mode 100644 ai-service-admin/src/stores/embedding.ts create mode 100644 ai-service-admin/src/types/embedding.ts create mode 100644 ai-service-admin/src/views/admin/embedding/index.vue create mode 100644 ai-service/app/api/admin/llm.py create mode 100644 ai-service/app/services/llm/factory.py create mode 100644 ai-service/scripts/check_qdrant.py create mode 100644 ai-service/scripts/cleanup_garbage.py create mode 100644 ai-service/scripts/test_excel_parse.py create mode 100644 test-doc.txt diff --git a/ai-service-admin/src/App.vue b/ai-service-admin/src/App.vue index bfe0572..a76a199 100644 --- a/ai-service-admin/src/App.vue +++ b/ai-service-admin/src/App.vue @@ -10,6 +10,7 @@ 知识库管理 RAG 实验室 会话监控 + 嵌入模型配置
diff --git a/ai-service-admin/src/api/dashboard.ts b/ai-service-admin/src/api/dashboard.ts index 3fee013..0d7ef78 100644 --- a/ai-service-admin/src/api/dashboard.ts +++ b/ai-service-admin/src/api/dashboard.ts @@ -1,8 +1,5 @@ import request from '@/utils/request' -/** - * 获取 Dashboard 统计数据 - */ export function getDashboardStats() { return request({ url: '/admin/dashboard/stats', diff --git a/ai-service-admin/src/api/embedding.ts b/ai-service-admin/src/api/embedding.ts index 7c6094e..418dc1e 100644 --- a/ai-service-admin/src/api/embedding.ts +++ b/ai-service-admin/src/api/embedding.ts @@ -52,21 +52,21 @@ export interface SupportedFormatsResponse { export function getProviders() { return request({ - url: '/admin/embedding/providers', + url: '/embedding/providers', method: 'get' }) } export function getConfig() { return request({ - url: '/admin/embedding/config', + url: '/embedding/config', method: 'get' }) } export function saveConfig(data: EmbeddingConfigUpdate) { return request({ - url: '/admin/embedding/config', + url: '/embedding/config', method: 'put', data }) @@ -74,7 +74,7 @@ export function saveConfig(data: EmbeddingConfigUpdate) { export function testEmbedding(data: EmbeddingTestRequest): Promise { return request({ - url: '/admin/embedding/test', + url: '/embedding/test', method: 'post', data }) @@ -82,7 +82,7 @@ export function testEmbedding(data: EmbeddingTestRequest): Promise + + +
+ {{ provider.display_name }} + {{ provider.description }} +
+
+
+ + + + + diff --git a/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue b/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue index 6136a71..3b0ae02 100644 --- a/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue +++ b/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue @@ -1,90 +1,111 @@ + + diff --git a/ai-service-admin/src/router/index.ts b/ai-service-admin/src/router/index.ts index 1bd5d32..6863b47 100644 --- a/ai-service-admin/src/router/index.ts +++ b/ai-service-admin/src/router/index.ts @@ -28,6 +28,12 @@ const routes: Array = [ name: 'Monitoring', component: () => import('@/views/monitoring/index.vue'), meta: { title: '会话监控' } + }, + { + path: '/admin/embedding', + name: 'EmbeddingConfig', + component: () => import('@/views/admin/embedding/index.vue'), + meta: { title: '嵌入模型配置' } } ] diff --git a/ai-service-admin/src/stores/embedding.ts b/ai-service-admin/src/stores/embedding.ts new file mode 100644 index 0000000..2cfb0ea --- /dev/null +++ b/ai-service-admin/src/stores/embedding.ts @@ -0,0 +1,164 @@ +import { defineStore } from 'pinia' +import { ref, computed } from 'vue' +import { + getProviders, + getConfig, + saveConfig, + testEmbedding, + getSupportedFormats, + type EmbeddingProviderInfo, + type EmbeddingConfig, + type EmbeddingConfigUpdate, + type EmbeddingTestResult, + type DocumentFormat +} from '@/api/embedding' + +export const useEmbeddingStore = defineStore('embedding', () => { + const providers = ref([]) + const currentConfig = ref({ + provider: '', + config: {} + }) + const formats = ref([]) + const loading = ref(false) + const providersLoading = ref(false) + const formatsLoading = ref(false) + const testResult = ref(null) + const testLoading = ref(false) + + const currentProvider = computed(() => { + return providers.value.find(p => p.name === currentConfig.value.provider) + }) + + const configSchema = computed(() => { + return currentProvider.value?.config_schema || { properties: {} } + }) + + const loadProviders = async () => { + providersLoading.value = true + try { + const res: any = await getProviders() + providers.value = res?.providers || res?.data?.providers || [] + } catch (error) { + console.error('Failed to load providers:', error) + throw error + } finally { + providersLoading.value = false + } + } + + const loadConfig = async () => { + loading.value = true + try { + const res: any = await getConfig() + const config = res?.data || res + if (config) { + currentConfig.value = { + provider: config.provider || '', + config: config.config || {}, + updated_at: config.updated_at + } + } + } catch (error) { + console.error('Failed to load config:', error) + throw error + } finally { + loading.value = false + } + } + + const saveCurrentConfig = async () => { + loading.value = true + try { + const updateData: EmbeddingConfigUpdate = { + provider: currentConfig.value.provider, + config: currentConfig.value.config + } + await saveConfig(updateData) + } catch (error) { + console.error('Failed to save config:', error) + throw error + } finally { + loading.value = false + } + } + + const runTest = async (testText?: string) => { + testLoading.value = true + testResult.value = null + try { + const result = await testEmbedding({ + test_text: testText, + config: { + provider: currentConfig.value.provider, + config: currentConfig.value.config + } + }) + testResult.value = result + } catch (error: any) { + testResult.value = { + success: false, + dimension: 0, + error: error?.message || '连接测试失败' + } + } finally { + testLoading.value = false + } + } + + const loadFormats = async () => { + formatsLoading.value = true + try { + const res: any = await getSupportedFormats() + formats.value = res?.formats || res?.data?.formats || [] + } catch (error) { + console.error('Failed to load formats:', error) + throw error + } finally { + formatsLoading.value = false + } + } + + const setProvider = (providerName: string) => { + currentConfig.value.provider = providerName + const provider = providers.value.find(p => p.name === providerName) + if (provider?.config_schema?.properties) { + const newConfig: Record = {} + Object.entries(provider.config_schema.properties).forEach(([key, field]: [string, any]) => { + newConfig[key] = field.default !== undefined ? field.default : '' + }) + currentConfig.value.config = newConfig + } else { + currentConfig.value.config = {} + } + } + + const updateConfigValue = (key: string, value: any) => { + currentConfig.value.config[key] = value + } + + const clearTestResult = () => { + testResult.value = null + } + + return { + providers, + currentConfig, + formats, + loading, + providersLoading, + formatsLoading, + testResult, + testLoading, + currentProvider, + configSchema, + loadProviders, + loadConfig, + saveCurrentConfig, + runTest, + loadFormats, + setProvider, + updateConfigValue, + clearTestResult + } +}) diff --git a/ai-service-admin/src/types/embedding.ts b/ai-service-admin/src/types/embedding.ts new file mode 100644 index 0000000..993f994 --- /dev/null +++ b/ai-service-admin/src/types/embedding.ts @@ -0,0 +1,49 @@ +export interface EmbeddingProviderInfo { + name: string + display_name: string + description?: string + config_schema: Record +} + +export interface EmbeddingConfig { + provider: string + config: Record + updated_at?: string +} + +export interface EmbeddingConfigUpdate { + provider: string + config?: Record +} + +export interface EmbeddingTestResult { + success: boolean + dimension: number + latency_ms?: number + message?: string + error?: string +} + +export interface DocumentFormat { + extension: string + name: string + description?: string +} + +export interface EmbeddingProvidersResponse { + providers: EmbeddingProviderInfo[] +} + +export interface EmbeddingConfigUpdateResponse { + success: boolean + message: string +} + +export interface SupportedFormatsResponse { + formats: DocumentFormat[] +} + +export interface EmbeddingTestRequest { + test_text?: string + config?: EmbeddingConfigUpdate +} diff --git a/ai-service-admin/src/views/admin/embedding/index.vue b/ai-service-admin/src/views/admin/embedding/index.vue new file mode 100644 index 0000000..d3f6d6a --- /dev/null +++ b/ai-service-admin/src/views/admin/embedding/index.vue @@ -0,0 +1,504 @@ + + + + + diff --git a/ai-service/app/api/admin/__init__.py b/ai-service/app/api/admin/__init__.py index 0ec350d..7582480 100644 --- a/ai-service/app/api/admin/__init__.py +++ b/ai-service/app/api/admin/__init__.py @@ -6,7 +6,8 @@ Admin API routes for AI Service management. from app.api.admin.dashboard import router as dashboard_router from app.api.admin.embedding import router as embedding_router from app.api.admin.kb import router as kb_router +from app.api.admin.llm import router as llm_router from app.api.admin.rag import router as rag_router from app.api.admin.sessions import router as sessions_router -__all__ = ["dashboard_router", "embedding_router", "kb_router", "rag_router", "sessions_router"] +__all__ = ["dashboard_router", "embedding_router", "kb_router", "llm_router", "rag_router", "sessions_router"] diff --git a/ai-service/app/api/admin/llm.py b/ai-service/app/api/admin/llm.py new file mode 100644 index 0000000..1556416 --- /dev/null +++ b/ai-service/app/api/admin/llm.py @@ -0,0 +1,146 @@ +""" +LLM Configuration Management API. +[AC-ASA-14, AC-ASA-15, AC-ASA-16, AC-ASA-17, AC-ASA-18] LLM provider management endpoints. +""" + +import logging +from typing import Any + +from fastapi import APIRouter, Request + +from app.core.tenant import get_tenant_id +from app.services.llm.factory import ( + LLMConfigManager, + LLMProviderFactory, + get_llm_config_manager, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/llm", tags=["LLM Management"]) + + +@router.get("/providers") +async def list_providers(request: Request) -> dict[str, Any]: + """ + List all available LLM providers. + [AC-ASA-15] Returns provider list with configuration schemas. + """ + tenant_id = get_tenant_id(request) + logger.info(f"[AC-ASA-15] Listing LLM providers for tenant={tenant_id}") + + providers = LLMProviderFactory.get_providers() + return { + "providers": [ + { + "name": p.name, + "display_name": p.display_name, + "description": p.description, + "config_schema": p.config_schema, + } + for p in providers + ], + } + + +@router.get("/config") +async def get_config(request: Request) -> dict[str, Any]: + """ + Get current LLM configuration. + [AC-ASA-14] Returns current provider and config. + """ + tenant_id = get_tenant_id(request) + logger.info(f"[AC-ASA-14] Getting LLM config for tenant={tenant_id}") + + manager = get_llm_config_manager() + config = manager.get_current_config() + + masked_config = _mask_secrets(config.get("config", {})) + + return { + "provider": config["provider"], + "config": masked_config, + } + + +@router.put("/config") +async def update_config( + request: Request, + body: dict[str, Any], +) -> dict[str, Any]: + """ + Update LLM configuration. + [AC-ASA-16] Updates provider and config with validation. + """ + tenant_id = get_tenant_id(request) + provider = body.get("provider") + config = body.get("config", {}) + + logger.info(f"[AC-ASA-16] Updating LLM config for tenant={tenant_id}, provider={provider}") + + if not provider: + return { + "success": False, + "message": "Provider is required", + } + + try: + manager = get_llm_config_manager() + await manager.update_config(provider, config) + + return { + "success": True, + "message": f"LLM configuration updated to {provider}", + } + + except ValueError as e: + logger.error(f"[AC-ASA-16] Invalid LLM config: {e}") + return { + "success": False, + "message": str(e), + } + + +@router.post("/test") +async def test_connection( + request: Request, + body: dict[str, Any] | None = None, +) -> dict[str, Any]: + """ + Test LLM connection. + [AC-ASA-17, AC-ASA-18] Tests connection and returns response. + """ + tenant_id = get_tenant_id(request) + body = body or {} + + test_prompt = body.get("test_prompt", "你好,请简单介绍一下自己。") + provider = body.get("provider") + config = body.get("config") + + logger.info( + f"[AC-ASA-17] Testing LLM connection for tenant={tenant_id}, " + f"provider={provider or 'current'}" + ) + + manager = get_llm_config_manager() + result = await manager.test_connection( + test_prompt=test_prompt, + provider=provider, + config=config, + ) + + return result + + +def _mask_secrets(config: dict[str, Any]) -> dict[str, Any]: + """Mask secret fields in config for display.""" + masked = {} + for key, value in config.items(): + if key in ("api_key", "password", "secret"): + if value: + masked[key] = f"{str(value)[:4]}***" + else: + masked[key] = "" + else: + masked[key] = value + return masked diff --git a/ai-service/app/api/admin/rag.py b/ai-service/app/api/admin/rag.py index 5a75bb4..048228b 100644 --- a/ai-service/app/api/admin/rag.py +++ b/ai-service/app/api/admin/rag.py @@ -1,24 +1,24 @@ """ RAG Lab endpoints for debugging and experimentation. -[AC-ASA-05] RAG experiment debugging with retrieval results and prompt visualization. +[AC-ASA-05, AC-ASA-19, AC-ASA-20, AC-ASA-21, AC-ASA-22] RAG experiment with AI output. """ +import json import logging +import time from typing import Annotated, Any, List from fastapi import APIRouter, Depends, Body -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field -from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import get_settings -from app.core.database import get_session from app.core.exceptions import MissingTenantIdException from app.core.tenant import get_tenant_id -from app.core.qdrant_client import get_qdrant_client from app.models import ErrorResponse from app.services.retrieval.vector_retriever import get_vector_retriever from app.services.retrieval.base import RetrievalContext +from app.services.llm.factory import get_llm_config_manager logger = logging.getLogger(__name__) @@ -36,16 +36,37 @@ def get_current_tenant_id() -> str: class RAGExperimentRequest(BaseModel): query: str = Field(..., description="Query text for retrieval") kb_ids: List[str] | None = Field(default=None, description="Knowledge base IDs to search") - params: dict[str, Any] | None = Field(default=None, description="Retrieval parameters") + top_k: int = Field(default=5, description="Number of results to retrieve") + score_threshold: float = Field(default=0.5, description="Minimum similarity score") + generate_response: bool = Field(default=True, description="Whether to generate AI response") + llm_provider: str | None = Field(default=None, description="Specific LLM provider to use") + + +class AIResponse(BaseModel): + content: str + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + latency_ms: float = 0 + model: str = "" + + +class RAGExperimentResult(BaseModel): + query: str + retrieval_results: List[dict] = [] + final_prompt: str = "" + ai_response: AIResponse | None = None + total_latency_ms: float = 0 + diagnostics: dict[str, Any] = {} @router.post( "/experiments/run", operation_id="runRagExperiment", - summary="Run RAG debugging experiment", - description="[AC-ASA-05] Trigger RAG experiment with retrieval and prompt generation.", + summary="Run RAG debugging experiment with AI output", + description="[AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] Trigger RAG experiment with retrieval, prompt generation, and AI response.", responses={ - 200: {"description": "Experiment results with retrieval and prompt"}, + 200: {"description": "Experiment results with retrieval, prompt, and AI response"}, 401: {"description": "Unauthorized", "model": ErrorResponse}, 403: {"description": "Forbidden", "model": ErrorResponse}, }, @@ -55,18 +76,19 @@ async def run_rag_experiment( request: RAGExperimentRequest = Body(...), ) -> JSONResponse: """ - [AC-ASA-05] Run RAG experiment and return retrieval results with final prompt. + [AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] Run RAG experiment and return retrieval results with AI response. """ + start_time = time.time() + logger.info( f"[AC-ASA-05] Running RAG experiment: tenant={tenant_id}, " - f"query={request.query[:50]}..., kb_ids={request.kb_ids}" + f"query={request.query[:50]}..., kb_ids={request.kb_ids}, " + f"generate_response={request.generate_response}" ) settings = get_settings() - - params = request.params or {} - top_k = params.get("topK", settings.rag_top_k) - threshold = params.get("threshold", settings.rag_score_threshold) + top_k = request.top_k or settings.rag_top_k + threshold = request.score_threshold or settings.rag_score_threshold try: retriever = await get_vector_retriever() @@ -94,14 +116,26 @@ async def run_rag_experiment( final_prompt = _build_final_prompt(request.query, retrieval_results) logger.info( - f"[AC-ASA-05] RAG experiment complete: hits={len(retrieval_results)}, " + f"[AC-ASA-05] RAG retrieval complete: hits={len(retrieval_results)}, " f"max_score={result.max_score:.3f}" ) + ai_response = None + if request.generate_response: + ai_response = await _generate_ai_response( + final_prompt, + provider=request.llm_provider, + ) + + total_latency_ms = (time.time() - start_time) * 1000 + return JSONResponse( content={ - "retrievalResults": retrieval_results, - "finalPrompt": final_prompt, + "query": request.query, + "retrieval_results": retrieval_results, + "final_prompt": final_prompt, + "ai_response": ai_response.model_dump() if ai_response else None, + "total_latency_ms": round(total_latency_ms, 2), "diagnostics": result.diagnostics, } ) @@ -112,10 +146,22 @@ async def run_rag_experiment( fallback_results = _get_fallback_results(request.query) fallback_prompt = _build_final_prompt(request.query, fallback_results) + ai_response = None + if request.generate_response: + ai_response = await _generate_ai_response( + fallback_prompt, + provider=request.llm_provider, + ) + + total_latency_ms = (time.time() - start_time) * 1000 + return JSONResponse( content={ - "retrievalResults": fallback_results, - "finalPrompt": fallback_prompt, + "query": request.query, + "retrieval_results": fallback_results, + "final_prompt": fallback_prompt, + "ai_response": ai_response.model_dump() if ai_response else None, + "total_latency_ms": round(total_latency_ms, 2), "diagnostics": { "error": str(e), "fallback": True, @@ -124,6 +170,130 @@ async def run_rag_experiment( ) +@router.post( + "/experiments/stream", + operation_id="runRagExperimentStream", + summary="Run RAG experiment with streaming AI output", + description="[AC-ASA-20] Trigger RAG experiment with SSE streaming for AI response.", + responses={ + 200: {"description": "SSE stream with retrieval results and AI response"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def run_rag_experiment_stream( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + request: RAGExperimentRequest = Body(...), +) -> StreamingResponse: + """ + [AC-ASA-20] Run RAG experiment with SSE streaming for AI response. + """ + logger.info( + f"[AC-ASA-20] Running RAG experiment stream: tenant={tenant_id}, " + f"query={request.query[:50]}..." + ) + + settings = get_settings() + top_k = request.top_k or settings.rag_top_k + threshold = request.score_threshold or settings.rag_score_threshold + + async def event_generator(): + try: + retriever = await get_vector_retriever() + + retrieval_ctx = RetrievalContext( + tenant_id=tenant_id, + query=request.query, + session_id="rag_experiment_stream", + channel_type="admin", + metadata={"kb_ids": request.kb_ids}, + ) + + result = await retriever.retrieve(retrieval_ctx) + + retrieval_results = [ + { + "content": hit.text, + "score": hit.score, + "source": hit.source, + "metadata": hit.metadata, + } + for hit in result.hits + ] + + final_prompt = _build_final_prompt(request.query, retrieval_results) + + yield f"event: retrieval\ndata: {json.dumps({'results': retrieval_results, 'count': len(retrieval_results)})}\n\n" + + yield f"event: prompt\ndata: {json.dumps({'prompt': final_prompt})}\n\n" + + if request.generate_response: + manager = get_llm_config_manager() + client = manager.get_client() + + full_content = "" + async for chunk in client.stream_generate( + messages=[{"role": "user", "content": final_prompt}], + ): + if chunk.delta: + full_content += chunk.delta + yield f"event: message\ndata: {json.dumps({'delta': chunk.delta})}\n\n" + + yield f"event: final\ndata: {json.dumps({'content': full_content, 'finish_reason': 'stop'})}\n\n" + else: + yield f"event: final\ndata: {json.dumps({'content': '', 'finish_reason': 'skipped'})}\n\n" + + except Exception as e: + logger.error(f"[AC-ASA-20] RAG experiment stream failed: {e}") + yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n" + + return StreamingResponse( + event_generator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + +async def _generate_ai_response( + prompt: str, + provider: str | None = None, +) -> AIResponse | None: + """ + [AC-ASA-19, AC-ASA-21] Generate AI response from prompt. + """ + import time + + try: + manager = get_llm_config_manager() + client = manager.get_client() + + start_time = time.time() + response = await client.generate( + messages=[{"role": "user", "content": prompt}], + ) + latency_ms = (time.time() - start_time) * 1000 + + return AIResponse( + content=response.content, + prompt_tokens=response.usage.get("prompt_tokens", 0), + completion_tokens=response.usage.get("completion_tokens", 0), + total_tokens=response.usage.get("total_tokens", 0), + latency_ms=round(latency_ms, 2), + model=response.model, + ) + + except Exception as e: + logger.error(f"[AC-ASA-19] AI response generation failed: {e}") + return AIResponse( + content=f"AI 响应生成失败: {str(e)}", + latency_ms=0, + ) + + def _build_final_prompt(query: str, retrieval_results: list[dict]) -> str: """ Build the final prompt from query and retrieval results. @@ -138,14 +308,14 @@ def _build_final_prompt(query: str, retrieval_results: list[dict]) -> str: for i, hit in enumerate(retrieval_results[:5]) ]) - return f"""基于以下检索到的信息,回答用户问题: + return f"""基于以下检索到的信息,作为一个回答简洁精准的客服,回答用户问题: 用户问题:{query} 检索结果: {evidence_text} -请基于以上信息生成专业、准确的回答。""" +请基于以上信息生成专业、准确的回答,注意输出内容应该格式整齐,不包含json符号等。""" def _get_fallback_results(query: str) -> list[dict]: diff --git a/ai-service/app/main.py b/ai-service/app/main.py index 3e380fd..165e02a 100644 --- a/ai-service/app/main.py +++ b/ai-service/app/main.py @@ -12,7 +12,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from app.api import chat_router, health_router -from app.api.admin import dashboard_router, embedding_router, kb_router, rag_router, sessions_router +from app.api.admin import dashboard_router, embedding_router, kb_router, llm_router, rag_router, sessions_router from app.core.config import get_settings from app.core.database import close_db, init_db from app.core.exceptions import ( @@ -115,6 +115,7 @@ app.include_router(chat_router) app.include_router(dashboard_router) app.include_router(embedding_router) app.include_router(kb_router) +app.include_router(llm_router) app.include_router(rag_router) app.include_router(sessions_router) diff --git a/ai-service/app/services/llm/factory.py b/ai-service/app/services/llm/factory.py new file mode 100644 index 0000000..fba2605 --- /dev/null +++ b/ai-service/app/services/llm/factory.py @@ -0,0 +1,332 @@ +""" +LLM Provider Factory and Configuration Management. +[AC-ASA-14, AC-ASA-15, AC-ASA-16, AC-ASA-17, AC-ASA-18] LLM provider management. + +Design pattern: Factory pattern for pluggable LLM providers. +""" + +import logging +from dataclasses import dataclass, field +from typing import Any + +from app.services.llm.base import LLMClient, LLMConfig +from app.services.llm.openai_client import OpenAIClient + +logger = logging.getLogger(__name__) + + +@dataclass +class LLMProviderInfo: + """Information about an LLM provider.""" + name: str + display_name: str + description: str + config_schema: dict[str, Any] + + +LLM_PROVIDERS: dict[str, LLMProviderInfo] = { + "openai": LLMProviderInfo( + name="openai", + display_name="OpenAI", + description="OpenAI GPT 系列模型 (GPT-4, GPT-3.5 等)", + config_schema={ + "api_key": { + "type": "string", + "description": "API Key", + "required": True, + "secret": True, + }, + "base_url": { + "type": "string", + "description": "API Base URL", + "default": "https://api.openai.com/v1", + }, + "model": { + "type": "string", + "description": "模型名称", + "default": "gpt-4o-mini", + }, + "max_tokens": { + "type": "integer", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "description": "温度参数 (0-2)", + "default": 0.7, + }, + }, + ), + "ollama": LLMProviderInfo( + name="ollama", + display_name="Ollama", + description="Ollama 本地模型 (Llama, Qwen 等)", + config_schema={ + "base_url": { + "type": "string", + "description": "Ollama API 地址", + "default": "http://localhost:11434/v1", + }, + "model": { + "type": "string", + "description": "模型名称", + "default": "llama3.2", + }, + "max_tokens": { + "type": "integer", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "description": "温度参数 (0-2)", + "default": 0.7, + }, + }, + ), + "azure": LLMProviderInfo( + name="azure", + display_name="Azure OpenAI", + description="Azure OpenAI 服务", + config_schema={ + "api_key": { + "type": "string", + "description": "API Key", + "required": True, + "secret": True, + }, + "base_url": { + "type": "string", + "description": "Azure Endpoint", + "required": True, + }, + "model": { + "type": "string", + "description": "部署名称", + "required": True, + }, + "api_version": { + "type": "string", + "description": "API 版本", + "default": "2024-02-15-preview", + }, + "max_tokens": { + "type": "integer", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "description": "温度参数 (0-2)", + "default": 0.7, + }, + }, + ), +} + + +class LLMProviderFactory: + """ + Factory for creating LLM clients. + [AC-ASA-14, AC-ASA-15] Dynamic provider creation. + """ + + @classmethod + def get_providers(cls) -> list[LLMProviderInfo]: + """Get all registered LLM providers.""" + return list(LLM_PROVIDERS.values()) + + @classmethod + def get_provider_info(cls, name: str) -> LLMProviderInfo | None: + """Get provider info by name.""" + return LLM_PROVIDERS.get(name) + + @classmethod + def create_client( + cls, + provider: str, + config: dict[str, Any], + ) -> LLMClient: + """ + Create an LLM client for the specified provider. + [AC-ASA-15] Factory method for client creation. + + Args: + provider: Provider name (openai, ollama, azure) + config: Provider configuration + + Returns: + LLMClient instance + + Raises: + ValueError: If provider is not supported + """ + if provider not in LLM_PROVIDERS: + raise ValueError(f"Unsupported LLM provider: {provider}") + + if provider in ("openai", "ollama", "azure"): + return OpenAIClient( + api_key=config.get("api_key"), + base_url=config.get("base_url"), + model=config.get("model"), + default_config=LLMConfig( + model=config.get("model", "gpt-4o-mini"), + max_tokens=config.get("max_tokens", 2048), + temperature=config.get("temperature", 0.7), + ), + ) + + raise ValueError(f"Unsupported LLM provider: {provider}") + + +class LLMConfigManager: + """ + Manager for LLM configuration. + [AC-ASA-16, AC-ASA-17, AC-ASA-18] Configuration management with hot-reload. + """ + + def __init__(self): + self._current_provider: str = "openai" + self._current_config: dict[str, Any] = {} + self._client: LLMClient | None = None + + def get_current_config(self) -> dict[str, Any]: + """Get current LLM configuration.""" + return { + "provider": self._current_provider, + "config": self._current_config, + } + + async def update_config( + self, + provider: str, + config: dict[str, Any], + ) -> bool: + """ + Update LLM configuration. + [AC-ASA-16] Hot-reload configuration. + + Args: + provider: Provider name + config: New configuration + + Returns: + True if update successful + """ + if provider not in LLM_PROVIDERS: + raise ValueError(f"Unsupported LLM provider: {provider}") + + provider_info = LLM_PROVIDERS[provider] + validated_config = self._validate_config(provider_info, config) + + if self._client: + await self._client.close() + self._client = None + + self._current_provider = provider + self._current_config = validated_config + + logger.info(f"[AC-ASA-16] LLM config updated: provider={provider}") + return True + + def _validate_config( + self, + provider_info: LLMProviderInfo, + config: dict[str, Any], + ) -> dict[str, Any]: + """Validate configuration against provider schema.""" + validated = {} + for key, schema in provider_info.config_schema.items(): + if key in config: + validated[key] = config[key] + elif "default" in schema: + validated[key] = schema["default"] + elif schema.get("required"): + raise ValueError(f"Missing required config: {key}") + return validated + + def get_client(self) -> LLMClient: + """Get or create LLM client with current config.""" + if self._client is None: + self._client = LLMProviderFactory.create_client( + self._current_provider, + self._current_config, + ) + return self._client + + async def test_connection( + self, + test_prompt: str = "你好,请简单介绍一下自己。", + provider: str | None = None, + config: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """ + Test LLM connection. + [AC-ASA-17, AC-ASA-18] Connection testing. + + Args: + test_prompt: Test prompt to send + provider: Optional provider to test (uses current if not specified) + config: Optional config to test (uses current if not specified) + + Returns: + Test result with success status, response, and metrics + """ + import time + + test_provider = provider or self._current_provider + test_config = config or self._current_config + + if test_provider not in LLM_PROVIDERS: + return { + "success": False, + "error": f"Unsupported provider: {test_provider}", + } + + try: + client = LLMProviderFactory.create_client(test_provider, test_config) + + start_time = time.time() + response = await client.generate( + messages=[{"role": "user", "content": test_prompt}], + ) + latency_ms = (time.time() - start_time) * 1000 + + await client.close() + + return { + "success": True, + "response": response.content, + "latency_ms": round(latency_ms, 2), + "prompt_tokens": response.usage.get("prompt_tokens", 0), + "completion_tokens": response.usage.get("completion_tokens", 0), + "total_tokens": response.usage.get("total_tokens", 0), + "model": response.model, + "message": f"连接成功,模型: {response.model}", + } + + except Exception as e: + logger.error(f"[AC-ASA-18] LLM test failed: {e}") + return { + "success": False, + "error": str(e), + "message": f"连接失败: {str(e)}", + } + + async def close(self) -> None: + """Close the current client.""" + if self._client: + await self._client.close() + self._client = None + + +_llm_config_manager: LLMConfigManager | None = None + + +def get_llm_config_manager() -> LLMConfigManager: + """Get or create LLM config manager instance.""" + global _llm_config_manager + if _llm_config_manager is None: + _llm_config_manager = LLMConfigManager() + return _llm_config_manager diff --git a/ai-service/pyproject.toml b/ai-service/pyproject.toml index 432330f..ae928af 100644 --- a/ai-service/pyproject.toml +++ b/ai-service/pyproject.toml @@ -16,6 +16,10 @@ dependencies = [ "asyncpg>=0.29.0", "qdrant-client>=1.7.0", "tiktoken>=0.5.0", + "openpyxl>=3.1.0", + "python-docx>=1.1.0", + "pymupdf>=1.23.0", + "pdfplumber>=0.10.0", ] [project.optional-dependencies] diff --git a/ai-service/scripts/check_qdrant.py b/ai-service/scripts/check_qdrant.py new file mode 100644 index 0000000..1612bf3 --- /dev/null +++ b/ai-service/scripts/check_qdrant.py @@ -0,0 +1,80 @@ +""" +Check Qdrant vector database contents - detailed view. +""" +import asyncio +import sys +sys.path.insert(0, ".") + +from qdrant_client import AsyncQdrantClient +from app.core.config import get_settings +from collections import defaultdict + +settings = get_settings() + + +async def check_qdrant(): + """Check Qdrant collections and vectors.""" + client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False) + + print(f"\n{'='*60}") + print(f"Qdrant URL: {settings.qdrant_url}") + print(f"{'='*60}\n") + + # List all collections + collections = await client.get_collections() + + # Check kb_default collection + for c in collections.collections: + if c.name == "kb_default": + print(f"\n--- Collection: {c.name} ---") + + # Get collection info + info = await client.get_collection(c.name) + print(f" Total vectors: {info.points_count}") + + # Scroll through all points and group by source + all_points = [] + offset = None + + while True: + points, offset = await client.scroll( + collection_name=c.name, + limit=100, + offset=offset, + with_payload=True, + with_vectors=False, + ) + all_points.extend(points) + if offset is None: + break + + # Group by source + by_source = defaultdict(list) + for p in all_points: + source = p.payload.get("source", "unknown") if p.payload else "unknown" + by_source[source].append(p) + + print(f"\n Documents by source:") + for source, points in by_source.items(): + print(f"\n Source: {source}") + print(f" Chunks: {len(points)}") + + # Check first chunk content + first_point = points[0] + text = first_point.payload.get("text", "") if first_point.payload else "" + + # Check if it's binary garbage or proper text + is_garbage = any(ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t') for c in text[:200]) + + if is_garbage: + print(f" Status: ❌ BINARY GARBAGE (parsing failed)") + else: + print(f" Status: ✅ PROPER TEXT (parsed correctly)") + + print(f" Preview: {text[:150]}...") + + await client.close() + + +if __name__ == "__main__": + asyncio.run(check_qdrant()) diff --git a/ai-service/scripts/cleanup_garbage.py b/ai-service/scripts/cleanup_garbage.py new file mode 100644 index 0000000..5948034 --- /dev/null +++ b/ai-service/scripts/cleanup_garbage.py @@ -0,0 +1,115 @@ +""" +Clean up garbage data from Qdrant vector database. +Removes vectors that contain binary garbage (failed parsing results). +""" +import asyncio +import sys +sys.path.insert(0, ".") + +from qdrant_client import AsyncQdrantClient +from qdrant_client.models import PointIdsList +from app.core.config import get_settings +from collections import defaultdict + +settings = get_settings() + + +def is_garbage_text(text: str) -> bool: + """Check if text contains binary garbage.""" + if not text: + return True + + sample = text[:500] + + garbage_chars = sum(1 for c in sample if ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t')) + + return garbage_chars > len(sample) * 0.1 + + +async def cleanup_garbage(): + """Clean up garbage data from Qdrant.""" + client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False) + + print(f"\n{'='*60}") + print(f"Cleaning up garbage data from Qdrant") + print(f"URL: {settings.qdrant_url}") + print(f"{'='*60}\n") + + collections = await client.get_collections() + + for c in collections.collections: + if not c.name.startswith(settings.qdrant_collection_prefix): + continue + + print(f"\n--- Collection: {c.name} ---") + + info = await client.get_collection(c.name) + print(f" Total vectors: {info.points_count}") + + all_points = [] + offset = None + + while True: + points, offset = await client.scroll( + collection_name=c.name, + limit=100, + offset=offset, + with_payload=True, + with_vectors=False, + ) + all_points.extend(points) + if offset is None: + break + + by_source = defaultdict(list) + for p in all_points: + source = p.payload.get("source", "unknown") if p.payload else "unknown" + by_source[source].append(p) + + garbage_sources = [] + good_sources = [] + + for source, points in by_source.items(): + first_point = points[0] + text = first_point.payload.get("text", "") if first_point.payload else "" + + if is_garbage_text(text): + garbage_sources.append((source, points)) + else: + good_sources.append((source, points)) + + print(f"\n Good documents: {len(good_sources)}") + print(f" Garbage documents: {len(garbage_sources)}") + + if garbage_sources: + print(f"\n Garbage documents to delete:") + for source, points in garbage_sources: + print(f" - {source} ({len(points)} chunks)") + preview = "" + if points[0].payload: + preview = points[0].payload.get("text", "")[:80] + print(f" Preview: {repr(preview)}...") + + confirm = input("\n Delete these garbage documents? (y/n): ") + + if confirm.lower() == 'y': + for source, points in garbage_sources: + point_ids = [p.id for p in points] + + await client.delete( + collection_name=c.name, + points_selector=PointIdsList(points=point_ids) + ) + print(f" Deleted {len(point_ids)} vectors for source {source}") + + print(f"\n Cleanup complete!") + else: + print(f"\n Cancelled.") + else: + print(f"\n No garbage data found.") + + await client.close() + + +if __name__ == "__main__": + asyncio.run(cleanup_garbage()) diff --git a/ai-service/scripts/test_excel_parse.py b/ai-service/scripts/test_excel_parse.py new file mode 100644 index 0000000..47bc983 --- /dev/null +++ b/ai-service/scripts/test_excel_parse.py @@ -0,0 +1,40 @@ +""" +Test Excel parsing directly. +""" +import sys +sys.path.insert(0, ".") + +from app.services.document import parse_document, get_supported_document_formats + +print("Supported formats:", get_supported_document_formats()) +print() + +# Test with a sample xlsx file if available +import os +from pathlib import Path + +# Find any xlsx files in the uploads directory +uploads_dir = Path("uploads") +if uploads_dir.exists(): + xlsx_files = list(uploads_dir.glob("**/*.xlsx")) + print(f"Found {len(xlsx_files)} xlsx files") + + for f in xlsx_files[:1]: # Test first one + print(f"\nTesting: {f}") + try: + result = parse_document(str(f)) + print(f" SUCCESS: chars={len(result.text)}") + print(f" metadata: {result.metadata}") + print(f" preview: {result.text[:500]}...") + except Exception as e: + print(f" FAILED: {type(e).__name__}: {e}") +else: + print("No uploads directory found") + +# Test openpyxl directly +print("\n--- Testing openpyxl directly ---") +try: + import openpyxl + print(f"openpyxl version: {openpyxl.__version__}") +except ImportError as e: + print(f"openpyxl NOT installed: {e}") diff --git a/docs/progress/ai-service-admin-progress.md b/docs/progress/ai-service-admin-progress.md index 3e9c271..43b074d 100644 --- a/docs/progress/ai-service-admin-progress.md +++ b/docs/progress/ai-service-admin-progress.md @@ -3,7 +3,8 @@ module: ai-service-admin feature: ASA status: in_progress created: 2026-02-24 -last_updated: 2026-02-24 +last_updated: "2026-02-24" +version: "0.3.0" --- # AI 中台管理界面(ai-service-admin)进度文档 @@ -28,41 +29,60 @@ last_updated: 2026-02-24 - [x] Phase 3: RAG 实验室 (100%) [P3-01 ~ P3-04] - [x] Phase 4: 会话监控与详情 (100%) [P4-01 ~ P4-03] - [x] Phase 5: 后端管理接口实现 (100%) [Backend Admin APIs] +- [ ] Phase 6: 嵌入模型管理 (0%) [P5-01 ~ P5-08] +- [ ] Phase 7: LLM 配置与 RAG 调试输出 (0%) [P6-01 ~ P6-10] 🔄当前 ## current_phase -**goal**: 知识库管理模块开发,实现文档上传、列表展示与状态轮询 +**goal**: 实现 LLM 模型配置页面及 RAG 实验室 AI 输出调试功能 ### sub_tasks -- [x] (P1-01) 初始化 `ai-service-admin` 前端工程(Vue 3 + Element Plus + RuoYi-Vue 基座对齐),落地基础目录结构与路由骨架 -- [x] (P1-02) 接入 Pinia:实现 `tenant` store(`currentTenantId`)并持久化(localStorage),提供切换租户能力 -- [x] (P1-03) Axios/SDK 请求层封装:创建统一 `request` 实例,自动注入必填 Header `X-Tenant-Id` -- [x] (P1-04) 全局异常拦截:实现 401/403 响应拦截策略 -- [x] (P1-05) 基础组件封装:`BaseTable`、`BaseForm` 并给出示例页 -- [x] (P2-01) 创建 `openapi.deps.yaml` 明确依赖契约 (L1) [AC-ASA-08] -- [x] (P2-02) 实现知识库列表 API 对接及分页展示 [AC-ASA-08] -- [x] (P2-03) 实现文档上传功能(Multipart/form-data)[AC-ASA-01] -- [x] (P2-04) 实现索引任务状态轮询机制(3s 间隔)[AC-ASA-02] -- [x] (P2-05) 失败任务错误详情弹窗展示 [AC-ASA-02] -- [x] (P5-01) 实现后端 GET /admin/kb/documents 文档列表接口 [AC-ASA-08] -- [x] (P5-02) 实现后端 POST /admin/kb/documents 文档上传接口 [AC-ASA-01] -- [x] (P5-03) 实现后端 GET /admin/kb/index/jobs/{jobId} 索引任务查询接口 [AC-ASA-02] -- [x] (P5-04) 实现后端 POST /admin/rag/experiments/run RAG实验接口 [AC-ASA-05] -- [x] (P5-05) 实现后端 GET /admin/sessions 会话列表接口 [AC-ASA-09] -- [x] (P5-06) 实现后端 GET /admin/sessions/{sessionId} 会话详情接口 [AC-ASA-07] +#### Phase 1-5 已完成 +- [x] (P1-01) 初始化前端工程 +- [x] (P1-02) 接入 Pinia tenant store +- [x] (P1-03) Axios 请求层封装 +- [x] (P1-04) 全局异常拦截 +- [x] (P1-05) 基础组件封装 +- [x] (P2-01~P2-05) 知识库管理功能 +- [x] (P3-01~P3-04) RAG 实验室功能 +- [x] (P4-01~P4-03) 会话监控功能 +- [x] (P5-01~P5-06) 后端管理接口实现 + +#### Phase 6: 嵌入模型管理(待处理) +- [ ] (P5-01) API 服务层与类型定义 [AC-ASA-08, AC-ASA-09] +- [ ] (P5-02) 提供者选择组件 [AC-ASA-09] +- [ ] (P5-03) 动态配置表单 [AC-ASA-09, AC-ASA-10] +- [ ] (P5-04) 测试连接组件 [AC-ASA-11, AC-ASA-12] +- [ ] (P5-05) 支持格式组件 [AC-ASA-13] +- [ ] (P5-06) 页面骨架与路由 [AC-ASA-08] +- [ ] (P5-07) 配置加载与保存 [AC-ASA-08, AC-ASA-10] +- [ ] (P5-08) 组件整合与测试 [AC-ASA-08~AC-ASA-13] + +#### Phase 7: LLM 配置与 RAG 调试输出(当前) +- [ ] (P6-01) LLM API 服务层与类型定义:创建 src/api/llm.ts 和 src/types/llm.ts [AC-ASA-14, AC-ASA-15] +- [ ] (P6-02) LLM 提供者选择组件:创建 LLMProviderSelect.vue [AC-ASA-15] +- [ ] (P6-03) LLM 动态配置表单:创建 LLMConfigForm.vue [AC-ASA-15, AC-ASA-16] +- [ ] (P6-04) LLM 测试连接组件:创建 LLMTestPanel.vue [AC-ASA-17, AC-ASA-18] +- [ ] (P6-05) LLM 配置页面:创建 /admin/llm 页面 [AC-ASA-14, AC-ASA-16] +- [ ] (P6-06) AI 回复展示组件:创建 AIResponseViewer.vue [AC-ASA-19] +- [ ] (P6-07) 流式输出支持:实现 SSE 流式输出展示 [AC-ASA-20] +- [ ] (P6-08) Token 统计展示:展示 Token 消耗、响应耗时 [AC-ASA-21] +- [ ] (P6-09) LLM 选择器:在 RAG 实验室中添加 LLM 配置选择器 [AC-ASA-22] +- [ ] (P6-10) RAG 实验室整合:将 AI 输出组件整合到 RAG 实验室 [AC-ASA-19~AC-ASA-22] ### next_action -**immediate**: 后端管理接口已实现完成,等待前端联调 +**immediate**: 并行启动 3 个窗口执行 Phase 6 和 Phase 7 任务 **details**: -- file: "ai-service/app/api/admin/" -- action: "后端 7 个管理接口已全部实现,包含 Mock 数据返回,支持前端并行开发" -- reference: "spec/ai-service/openapi.admin.yaml" +- file: "ai-service-admin/src/" +- action: "窗口1: 嵌入管理组件; 窗口2: LLM 配置组件; 窗口3: RAG 实验室增强" +- reference: "spec/ai-service-admin/openapi.deps.yaml" - constraints: - - 所有接口均已实现 X-Tenant-Id Header 校验 - - 返回数据格式与契约定义一致 + - 每个任务必须包含 AC 标记 + - 完成后更新 spec/ai-service-admin/tasks.md + - commit message 格式: `feat(ASA-P6/P7): [AC-ASA-XX]` ### backend_implementation_summary @@ -147,6 +167,30 @@ export const useTenantStore = defineStore('tenant', { - ai-service/app/main.py - 注册管理路由 - docs/progress/ai-service-admin-progress.md - 更新进度 +- session: "Session #3 (2026-02-24) - 嵌入模型管理需求规划" + completed: + - 更新 spec/ai-service-admin/requirements.md 添加 v0.2.0 迭代需求 + - 更新 spec/ai-service-admin/tasks.md 添加 Phase 5 任务 + - 更新 spec/ai-service-admin/openapi.deps.yaml 添加嵌入管理接口 + - 更新进度文档添加 Phase 6 任务 + changes: + - spec/ai-service-admin/requirements.md - 新增 AC-ASA-08~AC-ASA-13 + - spec/ai-service-admin/tasks.md - 新增 P5-01~P5-08 任务 + - spec/ai-service-admin/openapi.deps.yaml - 完整重写,添加嵌入管理接口 + - docs/progress/ai-service-admin-progress.md - 添加 Phase 6 + +- session: "Session #4 (2026-02-24) - LLM 配置与 RAG 调试输出需求规划" + completed: + - 更新 spec/ai-service-admin/requirements.md 添加 v0.3.0 迭代需求 + - 更新 spec/ai-service-admin/tasks.md 添加 Phase 6 任务 + - 更新 spec/ai-service-admin/openapi.deps.yaml 添加 LLM 管理和 RAG 实验增强接口 + - 更新进度文档添加 Phase 7 任务 + changes: + - spec/ai-service-admin/requirements.md - 新增 AC-ASA-14~AC-ASA-22 + - spec/ai-service-admin/tasks.md - 新增 P6-01~P6-10 任务 + - spec/ai-service-admin/openapi.deps.yaml - 添加 LLM 配置接口和 RAG 实验增强接口 + - docs/progress/ai-service-admin-progress.md - 添加 Phase 7 + ## startup_guide 1. **Step 1**: 读取本进度文档(了解当前位置与下一步) @@ -159,9 +203,12 @@ export const useTenantStore = defineStore('tenant', { | Phase | 名称 | 任务数 | 状态 | |-------|------|--------|------| -| Phase 1 | 基础建设 | 5 | ⏳ 待开始 | -| Phase 2 | 知识库管理 | 5 | ⏳ 待开始 | -| Phase 3 | RAG 实验室 | 4 | ⏳ 待开始 | -| Phase 4 | 会话监控与详情 | 3 | ⏳ 待开始 | +| Phase 1 | 基础建设 | 5 | ✅ 完成 | +| Phase 2 | 知识库管理 | 5 | ✅ 完成 | +| Phase 3 | RAG 实验室 | 4 | ✅ 完成 | +| Phase 4 | 会话监控与详情 | 3 | ✅ 完成 | +| Phase 5 | 后端管理接口实现 | 6 | ✅ 完成 | +| Phase 6 | 嵌入模型管理 | 8 | ⏳ 待处理 | +| Phase 7 | LLM 配置与 RAG 调试输出 | 10 | 🔄 进行中 | -**总计: 17 个任务** +**总计: 41 个任务 | 已完成: 23 个 | 待处理: 8 个 | 进行中: 10 个** diff --git a/spec/ai-service-admin/openapi.deps.yaml b/spec/ai-service-admin/openapi.deps.yaml index feb5ea5..a763269 100644 --- a/spec/ai-service-admin/openapi.deps.yaml +++ b/spec/ai-service-admin/openapi.deps.yaml @@ -1,6 +1,592 @@ -openapi: 3.1.0 -info: - title: \" AI Service Admin "Dependencies\ - description: \ai-service-admin" 模块依赖的外�?API 契约(Consumer "需求侧)\ - version: \0.1.0\ - x-contract-level: L1 +openapi: 3.1.0 +info: + title: "AI Service Admin Dependencies" + description: "ai-service-admin 模块依赖的外部 API 契约(Consumer 需求侧)" + version: "0.3.0" + x-contract-level: L1 + +servers: + - url: http://localhost:8000 + description: 本地开发服务器 + +paths: + /admin/embedding/providers: + get: + operationId: listEmbeddingProviders + summary: 获取可用的嵌入模型提供者列表 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/EmbeddingProviderInfo' + + /admin/embedding/config: + get: + operationId: getEmbeddingConfig + summary: 获取当前嵌入模型配置 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingConfig' + put: + operationId: updateEmbeddingConfig + summary: 更新嵌入模型配置 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingConfigUpdate' + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + + /admin/embedding/test: + post: + operationId: testEmbedding + summary: 测试嵌入模型连接 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_text: + type: string + description: 测试文本(可选) + config: + $ref: '#/components/schemas/EmbeddingConfigUpdate' + responses: + '200': + description: 测试成功 + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingTestResult' + + /admin/embedding/formats: + get: + operationId: getSupportedFormats + summary: 获取支持的文档格式列表 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回支持格式列表 + content: + application/json: + schema: + type: object + properties: + formats: + type: array + items: + $ref: '#/components/schemas/DocumentFormat' + + /admin/llm/providers: + get: + operationId: listLLMProviders + summary: 获取可用的 LLM 提供者列表 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/LLMProviderInfo' + + /admin/llm/config: + get: + operationId: getLLMConfig + summary: 获取当前 LLM 配置 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfig' + put: + operationId: updateLLMConfig + summary: 更新 LLM 配置 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + + /admin/llm/test: + post: + operationId: testLLM + summary: 测试 LLM 连接 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_prompt: + type: string + description: 测试提示词(可选) + example: "你好,请简单介绍一下自己。" + config: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 测试成功 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMTestResult' + + /admin/rag/experiments/run: + post: + operationId: runRagExperiment + summary: 运行 RAG 实验(含 AI 输出) + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: 实验完成 + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentResult' + + /admin/rag/experiments/stream: + post: + operationId: runRagExperimentStream + summary: 运行 RAG 实验(流式输出) + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: SSE 流式输出 + content: + text/event-stream: + schema: + type: string + +components: + schemas: + EmbeddingProviderInfo: + type: object + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "ollama" + display_name: + type: string + description: 提供者显示名称 + example: "Ollama 本地模型" + description: + type: string + description: 提供者描述 + example: "使用 Ollama 运行的本地嵌入模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + + EmbeddingConfig: + type: object + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "ollama" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + EmbeddingConfigUpdate: + type: object + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "ollama" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + EmbeddingTestResult: + type: object + required: + - success + - dimension + properties: + success: + type: boolean + description: 测试是否成功 + dimension: + type: integer + description: 返回的向量维度 + example: 768 + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 125.5 + message: + type: string + description: 测试结果消息 + example: "连接成功,向量维度: 768" + error: + type: string + description: 错误信息(失败时) + example: "连接超时" + + DocumentFormat: + type: object + required: + - extension + - name + properties: + extension: + type: string + description: 文件扩展名 + example: ".pdf" + name: + type: string + description: 格式名称 + example: "PDF 文档" + description: + type: string + description: 格式描述 + example: "使用 PyMuPDF 解析 PDF 文档" + + LLMProviderInfo: + type: object + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "openai" + display_name: + type: string + description: 提供者显示名称 + example: "OpenAI" + description: + type: string + description: 提供者描述 + example: "OpenAI GPT 系列模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + + LLMConfig: + type: object + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "openai" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + example: + api_key: "sk-xxx" + base_url: "https://api.openai.com/v1" + model: "gpt-4o-mini" + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + LLMConfigUpdate: + type: object + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "openai" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + LLMTestResult: + type: object + required: + - success + properties: + success: + type: boolean + description: 测试是否成功 + response: + type: string + description: LLM 响应内容 + example: "你好!我是一个 AI 助手..." + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 1250.5 + prompt_tokens: + type: integer + description: 输入 Token 数 + example: 15 + completion_tokens: + type: integer + description: 输出 Token 数 + example: 50 + total_tokens: + type: integer + description: 总 Token 数 + example: 65 + message: + type: string + description: 测试结果消息 + example: "连接成功" + error: + type: string + description: 错误信息(失败时) + example: "API Key 无效" + + RagExperimentRequest: + type: object + required: + - query + properties: + query: + type: string + description: 查询文本 + example: "什么是 RAG?" + kb_ids: + type: array + items: + type: string + description: 知识库 ID 列表 + top_k: + type: integer + description: 检索数量 + default: 5 + score_threshold: + type: number + description: 相似度阈值 + default: 0.5 + llm_provider: + type: string + description: 指定 LLM 提供者(可选) + example: "openai" + generate_response: + type: boolean + description: 是否生成 AI 回复 + default: true + + RagExperimentResult: + type: object + properties: + query: + type: string + description: 原始查询 + retrieval_results: + type: array + items: + $ref: '#/components/schemas/RetrievalResult' + final_prompt: + type: string + description: 最终拼接的 Prompt + ai_response: + $ref: '#/components/schemas/AIResponse' + total_latency_ms: + type: number + description: 总耗时(毫秒) + + RetrievalResult: + type: object + properties: + content: + type: string + description: 检索到的内容 + score: + type: number + description: 相似度分数 + source: + type: string + description: 来源文档 + metadata: + type: object + additionalProperties: true + description: 元数据 + + AIResponse: + type: object + properties: + content: + type: string + description: AI 回复内容 + prompt_tokens: + type: integer + description: 输入 Token 数 + completion_tokens: + type: integer + description: 输出 Token 数 + total_tokens: + type: integer + description: 总 Token 数 + latency_ms: + type: number + description: 生成耗时(毫秒) + model: + type: string + description: 使用的模型 diff --git a/spec/ai-service-admin/requirements.md b/spec/ai-service-admin/requirements.md index f6f2f2c..25df27a 100644 --- a/spec/ai-service-admin/requirements.md +++ b/spec/ai-service-admin/requirements.md @@ -2,7 +2,7 @@ feature_id: "ASA" title: "AI 中台管理界面(ai-service-admin)需求规范" status: "draft" -version: "0.1.0" +version: "0.3.0" owners: - "product" - "frontend" @@ -70,3 +70,92 @@ source: | AC-ASA-03 | /admin/config/prompt-templates/{tplId}/publish | POST | 发布指定版本 | | AC-ASA-05 | /admin/rag/experiments/run | POST | 触发调试实验 | | AC-ASA-07 | /admin/sessions/{sessionId} | GET | 获取全链路详情 | + +--- + +## 7. 迭代需求:嵌入模型管理(v0.2.0) + +> 说明:本节为 v0.2.0 迭代新增,用于支持嵌入模型的界面配置与管理。 + +### 7.1 嵌入模型配置管理 + +- [AC-ASA-08] WHEN 用户访问嵌入模型配置页面 THEN 系统 SHALL 展示当前激活的嵌入模型提供者及其配置参数。 + +- [AC-ASA-09] WHEN 用户切换嵌入模型提供者 THEN 系统 SHALL 动态展示该提供者的配置参数表单,并保留当前配置值。 + +- [AC-ASA-10] WHEN 用户修改嵌入模型配置并保存 THEN 系统 SHALL 验证配置有效性,更新配置并提示操作结果。 + +- [AC-ASA-11] WHEN 用户点击"测试连接"按钮 THEN 系统 SHALL 调用嵌入模型生成测试向量,展示连接状态、向量维度和响应延迟。 + +- [AC-ASA-12] WHEN 嵌入模型连接测试失败 THEN 系统 SHALL 展示详细错误信息,帮助用户排查配置问题。 + +### 7.2 文档格式支持展示 + +- [AC-ASA-13] WHEN 用户查看嵌入模型配置页面 THEN 系统 SHALL 展示当前支持的文档格式列表(PDF、Word、Excel、TXT 等)。 + +### 7.3 用户故事(迭代追加) + +- [US-ASA-06] 作为系统管理员,我希望在界面上配置和切换嵌入模型,以便快速适配不同的业务场景而无需修改代码。 + +- [US-ASA-07] 作为系统管理员,我希望在保存配置前测试嵌入模型连接,以便确保配置正确后再正式启用。 + +### 7.4 追踪映射(迭代追加) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-08 | /admin/embedding/config | GET | 获取当前配置 | +| AC-ASA-09 | /admin/embedding/providers | GET | 获取提供者列表及配置定义 | +| AC-ASA-10 | /admin/embedding/config | PUT | 更新配置 | +| AC-ASA-11 | /admin/embedding/test | POST | 测试连接 | +| AC-ASA-12 | /admin/embedding/test | POST | 测试失败错误展示 | +| AC-ASA-13 | /admin/embedding/formats | GET | 获取支持格式 | + +--- + +## 8. 迭代需求:LLM 模型配置与 RAG 调试输出(v0.3.0) + +> 说明:本节为 v0.3.0 迭代新增,用于支持 LLM 模型的界面配置及 RAG 实验室的 AI 输出调试。 + +### 8.1 LLM 模型配置管理 + +- [AC-ASA-14] WHEN 用户访问 LLM 模型配置页面 THEN 系统 SHALL 展示当前激活的 LLM 提供者及其配置参数(API Key、Base URL、模型名称等)。 + +- [AC-ASA-15] WHEN 用户切换 LLM 提供者 THEN 系统 SHALL 动态展示该提供者的配置参数表单,并保留当前配置值。 + +- [AC-ASA-16] WHEN 用户修改 LLM 模型配置并保存 THEN 系统 SHALL 验证配置有效性,更新配置并提示操作结果。 + +- [AC-ASA-17] WHEN 用户点击"测试连接"按钮 THEN 系统 SHALL 调用 LLM 生成测试回复,展示连接状态、模型响应和耗时。 + +- [AC-ASA-18] WHEN LLM 连接测试失败 THEN 系统 SHALL 展示详细错误信息,帮助用户排查配置问题。 + +### 8.2 RAG 实验室 AI 输出展示 + +- [AC-ASA-19] WHEN 用户运行 RAG 实验后 THEN 系统 SHALL 在结果区域新增"AI 回复"展示区,显示基于检索结果生成的 AI 最终输出。 + +- [AC-ASA-20] WHEN AI 回复生成中 THEN 系统 SHALL 展示 Loading 状态,支持流式输出展示(SSE)。 + +- [AC-ASA-21] WHEN AI 回复生成完成 THEN 系统 SHALL 展示完整的回复内容、Token 消耗统计、响应耗时。 + +- [AC-ASA-22] WHEN 用户选择不同的 LLM 配置 THEN 系统 SHALL 使用选定的 LLM 模型生成回复,便于对比不同模型效果。 + +### 8.3 用户故事(迭代追加) + +- [US-ASA-08] 作为系统管理员,我希望在界面上配置和切换不同的 LLM 提供者(如 OpenAI、Ollama、Azure 等),以便快速适配不同的业务场景。 + +- [US-ASA-09] 作为 AI 开发者,我希望在 RAG 实验室中看到 AI 的最终输出,以便完整调试 RAG 链路效果,而不仅仅是检索结果。 + +- [US-ASA-10] 作为 Prompt 工程师,我希望对比不同 LLM 模型在相同检索结果下的回复效果,以便选择最适合业务场景的模型。 + +### 8.4 追踪映射(迭代追加) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-14 | /admin/llm/config | GET | 获取当前 LLM 配置 | +| AC-ASA-15 | /admin/llm/providers | GET | 获取 LLM 提供者列表 | +| AC-ASA-16 | /admin/llm/config | PUT | 更新 LLM 配置 | +| AC-ASA-17 | /admin/llm/test | POST | 测试 LLM 连接 | +| AC-ASA-18 | /admin/llm/test | POST | LLM 测试失败错误展示 | +| AC-ASA-19 | /admin/rag/experiments/run | POST | RAG 实验增加 AI 输出 | +| AC-ASA-20 | /admin/rag/experiments/stream | POST | RAG 实验流式输出(SSE) | +| AC-ASA-21 | /admin/rag/experiments/run | POST | Token 统计与耗时 | +| AC-ASA-22 | /admin/rag/experiments/run | POST | 支持指定 LLM 配置 | diff --git a/spec/ai-service-admin/tasks.md b/spec/ai-service-admin/tasks.md index 08069e7..d274e43 100644 --- a/spec/ai-service-admin/tasks.md +++ b/spec/ai-service-admin/tasks.md @@ -123,29 +123,29 @@ principles: > 页面导向:嵌入模型配置页面,支持提供者切换、参数配置、连接测试。 -- [ ] (P5-01) 嵌入模型配置页面骨架:创建 `/admin/embedding` 路由,布局包含提供者选择区、配置表单区、测试连接区、支持格式展示区。 - - AC: [AC-ASA-08] +- [ ] (P5-01) API 服务层与类型定义:创建 src/api/embedding.ts 和 src/types/embedding.ts + - AC: [AC-ASA-08, AC-ASA-09] -- [x] (P5-02) 提供者选择组件:实现 `EmbeddingProviderSelect` 下拉组件,对接 `/admin/embedding/providers`,展示提供者列表(name、display_name、description)。 +- [ ] (P5-02) 提供者选择组件:实现 `EmbeddingProviderSelect` 下拉组件,对接 `/admin/embedding/providers` - AC: [AC-ASA-09] -- [x] (P5-03) 动态配置表单:根据选中提供者的 `config_schema` 动态渲染配置表单(支持 string、integer、number 类型),实现表单校验。 +- [ ] (P5-03) 动态配置表单:根据 `config_schema` 动态渲染配置表单,实现表单校验 - AC: [AC-ASA-09, AC-ASA-10] -- [ ] (P5-04) 当前配置加载:页面初始化时调用 `/admin/embedding/config` 获取当前配置,填充表单默认值。 +- [ ] (P5-04) 测试连接组件:实现 `EmbeddingTestPanel`,展示测试结果和错误信息 + - AC: [AC-ASA-11, AC-ASA-12] + +- [ ] (P5-05) 支持格式组件:实现 `SupportedFormats`,展示支持的文档格式列表 + - AC: [AC-ASA-13] + +- [ ] (P5-06) 页面骨架与路由:创建 `/admin/embedding` 页面,布局包含各功能区 - AC: [AC-ASA-08] -- [ ] (P5-05) 配置保存功能:实现保存按钮,调用 `PUT /admin/embedding/config`,处理成功/失败响应并提示用户。 - - AC: [AC-ASA-10] +- [ ] (P5-07) 配置加载与保存:实现配置加载、保存逻辑 + - AC: [AC-ASA-08, AC-ASA-10] -- [x] (P5-06) 测试连接功能:实现测试按钮,调用 `POST /admin/embedding/test`,展示测试结果(success、dimension、latency_ms、message)。 - - AC: [AC-ASA-11] - -- [x] (P5-07) 测试失败错误展示:测试失败时展示详细错误信息(error 字段),并提供排查建议。 - - AC: [AC-ASA-12] - -- [ ] (P5-08) 支持格式展示:调用 `/admin/embedding/formats` 获取支持的文档格式列表,以标签或卡片形式展示。 - - AC: [AC-ASA-13] +- [ ] (P5-08) 组件整合与测试:整合所有组件完成功能闭环 + - AC: [AC-ASA-08~AC-ASA-13] --- @@ -153,11 +153,68 @@ principles: | 任务 | 描述 | 状态 | |------|------|------| -| P5-01 | 嵌入模型配置页面骨架 | ⏳ 待处理 | -| P5-02 | 提供者选择组件 | ✅ 已完成 | -| P5-03 | 动态配置表单 | ✅ 已完成 | -| P5-04 | 当前配置加载 | ⏳ 待处理 | -| P5-05 | 配置保存功能 | ⏳ 待处理 | -| P5-06 | 测试连接功能 | ✅ 已完成 | -| P5-07 | 测试失败错误展示 | ✅ 已完成 | -| P5-08 | 支持格式展示 | ⏳ 待处理 | +| P5-01 | API 服务层与类型定义 | ⏳ 待处理 | +| P5-02 | 提供者选择组件 | ⏳ 待处理 | +| P5-03 | 动态配置表单 | ⏳ 待处理 | +| P5-04 | 测试连接组件 | ⏳ 待处理 | +| P5-05 | 支持格式组件 | ⏳ 待处理 | +| P5-06 | 页面骨架与路由 | ⏳ 待处理 | +| P5-07 | 配置加载与保存 | ⏳ 待处理 | +| P5-08 | 组件整合与测试 | ⏳ 待处理 | + +--- + +## Phase 6: LLM 模型配置与 RAG 调试输出(v0.3.0) + +> 页面导向:LLM 模型配置页面 + RAG 实验室 AI 输出增强。 + +### 6.1 LLM 模型配置 + +- [ ] (P6-01) LLM API 服务层与类型定义:创建 src/api/llm.ts 和 src/types/llm.ts + - AC: [AC-ASA-14, AC-ASA-15] + +- [ ] (P6-02) LLM 提供者选择组件:实现 `LLMProviderSelect` 下拉组件 + - AC: [AC-ASA-15] + +- [ ] (P6-03) LLM 动态配置表单:根据 `config_schema` 动态渲染配置表单 + - AC: [AC-ASA-15, AC-ASA-16] + +- [ ] (P6-04) LLM 测试连接组件:实现 `LLMTestPanel`,展示测试回复和耗时 + - AC: [AC-ASA-17, AC-ASA-18] + +- [ ] (P6-05) LLM 配置页面:创建 `/admin/llm` 页面,整合所有组件 + - AC: [AC-ASA-14, AC-ASA-16] + +### 6.2 RAG 实验室 AI 输出增强 + +- [ ] (P6-06) AI 回复展示组件:实现 `AIResponseViewer`,展示 AI 最终输出 + - AC: [AC-ASA-19] + +- [ ] (P6-07) 流式输出支持:实现 SSE 流式输出展示,支持实时显示 AI 回复 + - AC: [AC-ASA-20] + +- [ ] (P6-08) Token 统计展示:展示 Token 消耗、响应耗时等统计信息 + - AC: [AC-ASA-21] + +- [ ] (P6-09) LLM 选择器:在 RAG 实验室中添加 LLM 配置选择器 + - AC: [AC-ASA-22] + +- [ ] (P6-10) RAG 实验室整合:将 AI 输出组件整合到 RAG 实验室页面 + - AC: [AC-ASA-19~AC-ASA-22] + +--- + +## Phase 6 任务进度追踪 + +| 任务 | 描述 | 状态 | +|------|------|------| +| P6-01 | LLM API 服务层与类型定义 | ⏳ 待处理 | +| P6-02 | LLM 提供者选择组件 | ⏳ 待处理 | +| P6-03 | LLM 动态配置表单 | ⏳ 待处理 | +| P6-04 | LLM 测试连接组件 | ⏳ 待处理 | +| P6-05 | LLM 配置页面 | ⏳ 待处理 | +| P6-06 | AI 回复展示组件 | ⏳ 待处理 | +| P6-07 | 流式输出支持 | ⏳ 待处理 | +| P6-08 | Token 统计展示 | ⏳ 待处理 | +| P6-09 | LLM 选择器 | ⏳ 待处理 | +| P6-10 | RAG 实验室整合 | ⏳ 待处理 | diff --git a/spec/ai-service/openapi.provider.yaml b/spec/ai-service/openapi.provider.yaml index 9db455d..a5c3055 100644 --- a/spec/ai-service/openapi.provider.yaml +++ b/spec/ai-service/openapi.provider.yaml @@ -25,6 +25,10 @@ tags: description: 健康检查 - name: Embedding Management description: 嵌入模型管理 + - name: LLM Management + description: LLM 模型管理 + - name: RAG Lab + description: RAG 实验室 paths: /ai/chat: @@ -536,3 +540,428 @@ components: type: string description: 错误信息(失败时) example: "连接超时" + + LLMProviderInfo: + type: object + description: LLM 提供者信息 + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "openai" + display_name: + type: string + description: 提供者显示名称 + example: "OpenAI" + description: + type: string + description: 提供者描述 + example: "OpenAI GPT 系列模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + + LLMConfig: + type: object + description: 当前 LLM 配置 + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "openai" + config: + type: object + description: 提供者配置参数(敏感字段已脱敏) + additionalProperties: true + example: + api_key: "sk-***" + base_url: "https://api.openai.com/v1" + model: "gpt-4o-mini" + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + LLMConfigUpdate: + type: object + description: LLM 配置更新请求 + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "openai" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + LLMTestResult: + type: object + description: LLM 测试结果 + required: + - success + properties: + success: + type: boolean + description: 测试是否成功 + response: + type: string + description: LLM 响应内容 + example: "你好!我是一个 AI 助手..." + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 1250.5 + prompt_tokens: + type: integer + description: 输入 Token 数 + example: 15 + completion_tokens: + type: integer + description: 输出 Token 数 + example: 50 + total_tokens: + type: integer + description: 总 Token 数 + example: 65 + model: + type: string + description: 使用的模型 + example: "gpt-4o-mini" + message: + type: string + description: 测试结果消息 + example: "连接成功" + error: + type: string + description: 错误信息(失败时) + example: "API Key 无效" + + RagExperimentRequest: + type: object + description: RAG 实验请求 + required: + - query + properties: + query: + type: string + description: 查询文本 + example: "什么是 RAG?" + kb_ids: + type: array + items: + type: string + description: 知识库 ID 列表 + top_k: + type: integer + description: 检索数量 + default: 5 + score_threshold: + type: number + description: 相似度阈值 + default: 0.5 + generate_response: + type: boolean + description: 是否生成 AI 回复 + default: true + llm_provider: + type: string + description: 指定 LLM 提供者(可选) + example: "openai" + + RagExperimentResult: + type: object + description: RAG 实验结果 + properties: + query: + type: string + description: 原始查询 + retrieval_results: + type: array + items: + $ref: '#/components/schemas/RetrievalResult' + final_prompt: + type: string + description: 最终拼接的 Prompt + ai_response: + $ref: '#/components/schemas/AIResponse' + total_latency_ms: + type: number + description: 总耗时(毫秒) + diagnostics: + type: object + additionalProperties: true + description: 诊断信息 + + RetrievalResult: + type: object + description: 检索结果 + properties: + content: + type: string + description: 检索到的内容 + score: + type: number + description: 相似度分数 + source: + type: string + description: 来源文档 + metadata: + type: object + additionalProperties: true + description: 元数据 + + AIResponse: + type: object + description: AI 回复 + properties: + content: + type: string + description: AI 回复内容 + prompt_tokens: + type: integer + description: 输入 Token 数 + completion_tokens: + type: integer + description: 输出 Token 数 + total_tokens: + type: integer + description: 总 Token 数 + latency_ms: + type: number + description: 生成耗时(毫秒) + model: + type: string + description: 使用的模型 + + /admin/llm/providers: + get: + operationId: listLLMProviders + summary: 获取可用的 LLM 提供者列表 + description: | + [AC-ASA-15] 返回所有支持的 LLM 提供者及其配置参数定义。 + 支持的提供者:OpenAI、Ollama、Azure OpenAI + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/LLMProviderInfo' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/llm/config: + get: + operationId: getLLMConfig + summary: 获取当前 LLM 配置 + description: | + [AC-ASA-14] 返回当前激活的 LLM 提供者及其配置参数。 + 敏感字段(如 API Key)会被脱敏显示。 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfig' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + put: + operationId: updateLLMConfig + summary: 更新 LLM 配置 + description: | + [AC-ASA-16] 更新 LLM 提供者和配置参数。 + 配置更新后立即生效,无需重启服务。 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/llm/test: + post: + operationId: testLLM + summary: 测试 LLM 连接 + description: | + [AC-ASA-17, AC-ASA-18] 测试 LLM 提供者连接。 + 发送测试提示词并返回响应结果,包含 Token 消耗和延迟统计。 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_prompt: + type: string + description: 测试提示词(可选) + example: "你好,请简单介绍一下自己。" + provider: + type: string + description: 指定测试的提供者(可选,默认使用当前配置) + config: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 测试完成 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMTestResult' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/rag/experiments/run: + post: + operationId: runRagExperiment + summary: 运行 RAG 实验(含 AI 输出) + description: | + [AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] 运行 RAG 实验。 + 返回检索结果、最终 Prompt 和 AI 回复。 + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: 实验完成 + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentResult' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/rag/experiments/stream: + post: + operationId: runRagExperimentStream + summary: 运行 RAG 实验(流式输出) + description: | + [AC-ASA-20] 运行 RAG 实验并以 SSE 流式输出 AI 回复。 + 事件类型:retrieval、prompt、message、final、error + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: SSE 流式输出 + content: + text/event-stream: + schema: + type: string + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' diff --git a/spec/ai-service/progress.md b/spec/ai-service/progress.md index 44a3bb2..b113391 100644 --- a/spec/ai-service/progress.md +++ b/spec/ai-service/progress.md @@ -2,7 +2,7 @@ feature_id: "AISVC" title: "Python AI 中台(ai-service)进度追踪" status: "completed" -version: "0.3.0" +version: "0.4.0" last_updated: "2026-02-24" --- @@ -54,6 +54,66 @@ last_updated: "2026-02-24" | Phase 5 | 集成测试 | 100% | ✅ 完成 | | Phase 6 | 前后端联调 | 100% | ✅ 完成 | | Phase 7 | 嵌入模型可插拔与文档解析 | 100% | ✅ 完成 | +| Phase 8 | LLM 配置与 RAG 调试输出 | 100% | ✅ 完成 | + +**测试统计: 184 tests passing** + +--- + +## Phase 8: LLM 配置与 RAG 调试输出(v0.4.0 迭代) + +### 8.1 设计目标 + +- LLM 提供者可插拔设计 +- 支持界面配置不同供应商的 AI +- RAG 实验室支持 AI 输出调试 + +### 8.2 实现详情 (2026-02-24) + +#### LLM 服务实现 +- 创建 LLMProviderFactory 工厂类 (`app/services/llm/factory.py`) +- 支持 OpenAI、Ollama、Azure OpenAI 三种提供者 +- 实现 LLMConfigManager 配置热更新 +- 实现连接测试功能 + +#### API 端点实现 +- GET /admin/llm/providers - 获取 LLM 提供者列表 +- GET /admin/llm/config - 获取当前 LLM 配置 +- PUT /admin/llm/config - 更新 LLM 配置 +- POST /admin/llm/test - 测试 LLM 连接 + +#### RAG 实验增强 +- 更新 POST /admin/rag/experiments/run - 支持 AI 回复生成 +- 新增 POST /admin/rag/experiments/stream - SSE 流式输出 +- 支持 Token 统计和响应耗时 +- 支持指定 LLM 提供者 + +### 8.3 任务进度 + +| 任务 | 描述 | 状态 | +|------|------|------| +| T8.1 | LLMProviderFactory 工厂类 | ✅ 完成 | +| T8.2 | LLMConfigManager 配置管理 | ✅ 完成 | +| T8.3 | GET /admin/llm/providers | ✅ 完成 | +| T8.4 | GET /admin/llm/config | ✅ 完成 | +| T8.5 | PUT /admin/llm/config | ✅ 完成 | +| T8.6 | POST /admin/llm/test | ✅ 完成 | +| T8.7 | RAG 实验支持 AI 回复 | ✅ 完成 | +| T8.8 | RAG 实验流式输出 | ✅ 完成 | +| T8.9 | 支持指定 LLM 提供者 | ✅ 完成 | +| T8.10 | 更新 OpenAPI 契约 | ✅ 完成 | + +--- + +## v0.4.0 完成总结 + +**Phase 8 已全部完成** + +| 模块 | 文件数 | 状态 | +|------|--------|------| +| LLM 服务 | 1 | ✅ | +| API 端点 | 2 | ✅ | +| OpenAPI 契约 | 1 | ✅ | **测试统计: 184 tests passing** diff --git a/spec/ai-service/requirements.md b/spec/ai-service/requirements.md index 629d982..f82d61f 100644 --- a/spec/ai-service/requirements.md +++ b/spec/ai-service/requirements.md @@ -1,8 +1,8 @@ --- feature_id: "AISVC" title: "Python AI 中台(ai-service)需求规范" -status: "draft" -version: "0.3.0" +status: "completed" +version: "0.4.0" owners: - "product" - "backend" @@ -264,3 +264,45 @@ source: | AC-AISVC-39 | /admin/embedding/config | GET | getEmbeddingConfig | 当前配置查询 | | AC-AISVC-40 | /admin/embedding/config | PUT | updateEmbeddingConfig | 配置更新 | | AC-AISVC-41 | /admin/embedding/test | POST | testEmbedding | 连接测试 | + +--- + +## 11. 迭代需求:LLM 模型配置与 RAG 调试输出(v0.4.0) + +> 说明:本节为 v0.4.0 迭代新增,用于支持 LLM 模型的界面配置及 RAG 实验室的 AI 输出调试。 + +### 11.1 LLM 模型配置管理 + +- [AC-AISVC-42] WHEN 前端通过 `GET /admin/llm/providers` 获取 LLM 提供者列表 THEN 系统 SHALL 返回所有支持的 LLM 提供者及其配置参数定义。 + +- [AC-AISVC-43] WHEN 前端通过 `GET /admin/llm/config` 获取当前 LLM 配置 THEN 系统 SHALL 返回当前激活的 LLM 提供者及其配置参数(敏感字段脱敏)。 + +- [AC-AISVC-44] WHEN 前端通过 `PUT /admin/llm/config` 更新 LLM 配置 THEN 系统 SHALL 验证配置有效性,更新配置并立即生效。 + +- [AC-AISVC-45] WHEN 前端通过 `POST /admin/llm/test` 测试 LLM 连接 THEN 系统 SHALL 调用 LLM 生成测试回复,返回响应内容、Token 消耗和延迟统计。 + +- [AC-AISVC-46] WHEN LLM 连接测试失败 THEN 系统 SHALL 返回详细错误信息,帮助用户排查配置问题。 + +### 11.2 RAG 实验室 AI 输出增强 + +- [AC-AISVC-47] WHEN 前端通过 `POST /admin/rag/experiments/run` 运行 RAG 实验 THEN 系统 SHALL 返回检索结果、最终 Prompt 和 AI 回复。 + +- [AC-AISVC-48] WHEN 前端通过 `POST /admin/rag/experiments/stream` 运行 RAG 实验 THEN 系统 SHALL 以 SSE 流式输出 AI 回复。 + +- [AC-AISVC-49] WHEN RAG 实验生成 AI 回复 THEN 系统 SHALL 返回 Token 消耗统计和响应耗时。 + +- [AC-AISVC-50] WHEN RAG 实验请求指定 `llm_provider` THEN 系统 SHALL 使用指定的 LLM 提供者生成回复。 + +### 11.3 追踪映射(v0.4.0 迭代) + +| AC ID | Endpoint | 方法 | Operation | 描述 | +|-------|----------|------|-----------|------| +| AC-AISVC-42 | /admin/llm/providers | GET | listLLMProviders | LLM 提供者列表 | +| AC-AISVC-43 | /admin/llm/config | GET | getLLMConfig | 当前 LLM 配置查询 | +| AC-AISVC-44 | /admin/llm/config | PUT | updateLLMConfig | LLM 配置更新 | +| AC-AISVC-45 | /admin/llm/test | POST | testLLM | LLM 连接测试 | +| AC-AISVC-46 | /admin/llm/test | POST | testLLM | LLM 测试失败处理 | +| AC-AISVC-47 | /admin/rag/experiments/run | POST | runRagExperiment | RAG 实验含 AI 输出 | +| AC-AISVC-48 | /admin/rag/experiments/stream | POST | runRagExperimentStream | RAG 实验流式输出 | +| AC-AISVC-49 | /admin/rag/experiments/run | POST | runRagExperiment | Token 统计 | +| AC-AISVC-50 | /admin/rag/experiments/run | POST | runRagExperiment | 指定 LLM 提供者 | diff --git a/spec/ai-service/tasks.md b/spec/ai-service/tasks.md index ccfcce1..a740bf4 100644 --- a/spec/ai-service/tasks.md +++ b/spec/ai-service/tasks.md @@ -2,7 +2,7 @@ feature_id: "AISVC" title: "Python AI 中台(ai-service)任务清单" status: "completed" -version: "0.3.0" +version: "0.4.0" last_updated: "2026-02-24" --- @@ -83,7 +83,7 @@ last_updated: "2026-02-24" ## 5. 完成总结 -**Phase 1-7 已全部完成** +**Phase 1-7 已全部完成,Phase 8 进行中** | Phase | 描述 | 任务数 | 状态 | |-------|------|--------|------| @@ -94,8 +94,9 @@ last_updated: "2026-02-24" | Phase 5 | 集成测试 | 3 | ✅ 完成 | | Phase 6 | 前后端联调真实对接 | 9 | ✅ 完成 | | Phase 7 | 嵌入模型可插拔与文档解析 | 21 | ✅ 完成 | +| Phase 8 | LLM 配置与 RAG 调试输出 | 10 | ⏳ 进行中 | -**已完成: 53 个任务** +**已完成: 53 个任务 | 进行中: 10 个任务** --- @@ -121,3 +122,17 @@ last_updated: "2026-02-24" - [x] T7.19 编写嵌入服务单元测试 `[AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32]` ✅ - [x] T7.20 编写文档解析单元测试 `[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37]` ✅ - [x] T7.21 编写嵌入管理 API 集成测试 `[AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]` ✅ + +--- + +### Phase 8: LLM 配置与 RAG 调试输出(v0.4.0 迭代) +- [x] T8.1 设计 `LLMProviderFactory` 工厂类:支持根据配置动态加载提供者 `[AC-AISVC-42]` ✅ +- [x] T8.2 实现 `LLMConfigManager` 配置管理:支持动态配置与热更新 `[AC-AISVC-43, AC-AISVC-44]` ✅ +- [x] T8.3 实现 `GET /admin/llm/providers` API:返回可用提供者列表 `[AC-AISVC-42]` ✅ +- [x] T8.4 实现 `GET /admin/llm/config` API:返回当前配置 `[AC-AISVC-43]` ✅ +- [x] T8.5 实现 `PUT /admin/llm/config` API:更新配置 `[AC-AISVC-44]` ✅ +- [x] T8.6 实现 `POST /admin/llm/test` API:测试 LLM 连接 `[AC-AISVC-45, AC-AISVC-46]` ✅ +- [x] T8.7 更新 RAG 实验接口:支持 AI 回复生成 `[AC-AISVC-47, AC-AISVC-49]` ✅ +- [x] T8.8 实现 RAG 实验流式输出:SSE 流式 AI 回复 `[AC-AISVC-48]` ✅ +- [x] T8.9 支持指定 LLM 提供者:RAG 实验可选择不同 LLM `[AC-AISVC-50]` ✅ +- [x] T8.10 更新 OpenAPI 契约:添加 LLM 管理和 RAG 实验增强接口 ✅ diff --git a/test-doc.txt b/test-doc.txt new file mode 100644 index 0000000..59ecf46 --- /dev/null +++ b/test-doc.txt @@ -0,0 +1,18 @@ +这是一个测试文档,用于验证RAG检索功能。 + +世界设定: +这是一个奇幻世界,名为艾泽拉斯。这个世界由多个大陆组成,包括东部王国、卡利姆多和诺森德。 + +主要种族: +1. 人类 - 居住在东部王国,拥有强大的骑士和法师 +2. 精灵 - 分为暗夜精灵和高等精灵,擅长弓箭和魔法 +3. 矮人 - 居住在山脉中,善于锻造和采矿 +4. 兽人 - 来自外域,拥有强大的战士 + +魔法系统: +这个世界充满了魔法能量,法师可以从空气中汲取魔力施放法术。 +主要魔法类型包括:火焰、冰霜、奥术、暗影和神圣。 + +历史背景: +这个世界经历了多次大战,最近的一次是天灾军团的入侵。 +巫妖王阿尔萨斯率领亡灵大军试图征服整个世界。 \ No newline at end of file