From 02f03a3a1221de7f8fe533fc813a07fc2a1c132d Mon Sep 17 00:00:00 2001 From: MerCry Date: Wed, 25 Feb 2026 23:42:31 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20RAG=20=E9=85=8D=E7=BD=AE=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E4=B8=8E=E6=A3=80=E7=B4=A2=E6=97=A5=E5=BF=97=E5=A2=9E?= =?UTF-8?q?=E5=BC=BA=20[AC-AISVC-16,=20AC-AISVC-17]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ai-service-admin/src/api/rag.ts | 4 ++++ ai-service/app/core/config.py | 9 +++++++- .../services/retrieval/vector_retriever.py | 21 +++++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/ai-service-admin/src/api/rag.ts b/ai-service-admin/src/api/rag.ts index cd6c495..12ca036 100644 --- a/ai-service-admin/src/api/rag.ts +++ b/ai-service-admin/src/api/rag.ts @@ -1,4 +1,5 @@ import request from '@/utils/request' +import { useTenantStore } from '@/stores/tenant' export interface AIResponse { content: string @@ -73,6 +74,8 @@ export function createSSEConnection( const baseUrl = import.meta.env.VITE_APP_BASE_API || '/api' const fullUrl = `${baseUrl}${url}` + const tenantStore = useTenantStore() + const controller = new AbortController() fetch(fullUrl, { @@ -80,6 +83,7 @@ export function createSSEConnection( headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream', + 'X-Tenant-Id': tenantStore.currentTenantId || '', }, body: JSON.stringify(body), signal: controller.signal diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py index bc6ffc9..6fcadec 100644 --- a/ai-service/app/core/config.py +++ b/ai-service/app/core/config.py @@ -44,9 +44,16 @@ class Settings(BaseSettings): ollama_embedding_model: str = "nomic-embed-text" rag_top_k: int = 5 - rag_score_threshold: float = 0.3 + rag_score_threshold: float = 0.01 rag_min_hits: int = 1 rag_max_evidence_tokens: int = 2000 + + rag_two_stage_enabled: bool = True + rag_two_stage_expand_factor: int = 10 + rag_hybrid_enabled: bool = True + rag_rrf_k: int = 60 + rag_vector_weight: float = 0.7 + rag_bm25_weight: float = 0.3 confidence_low_threshold: float = 0.5 confidence_high_threshold: float = 0.8 diff --git a/ai-service/app/services/retrieval/vector_retriever.py b/ai-service/app/services/retrieval/vector_retriever.py index e63de6c..eba3fa0 100644 --- a/ai-service/app/services/retrieval/vector_retriever.py +++ b/ai-service/app/services/retrieval/vector_retriever.py @@ -61,20 +61,31 @@ class VectorRetriever(BaseRetriever): RetrievalResult with filtered hits. """ logger.info( - f"[AC-AISVC-16] Starting vector retrieval for tenant={ctx.tenant_id}, query={ctx.query[:50]}..." + f"[AC-AISVC-16] Starting vector retrieval for tenant={ctx.tenant_id}, " + f"query={ctx.query[:50]}..." + ) + logger.info( + f"[AC-AISVC-16] Retrieval config: top_k={self._top_k}, " + f"score_threshold={self._score_threshold}, min_hits={self._min_hits}" ) try: client = await self._get_client() + logger.info(f"[AC-AISVC-16] Got Qdrant client: {type(client).__name__}") + logger.info("[AC-AISVC-16] Generating embedding for query...") query_vector = await self._get_embedding(ctx.query) + logger.info(f"[AC-AISVC-16] Embedding generated: dim={len(query_vector)}") + logger.info(f"[AC-AISVC-16] Searching in tenant collection: tenant_id={ctx.tenant_id}") hits = await client.search( tenant_id=ctx.tenant_id, query_vector=query_vector, limit=self._top_k, score_threshold=self._score_threshold, ) + + logger.info(f"[AC-AISVC-16] Search returned {len(hits)} raw hits") retrieval_hits = [ RetrievalHit( @@ -104,6 +115,12 @@ class VectorRetriever(BaseRetriever): f"[AC-AISVC-17] Retrieval complete: {len(retrieval_hits)} hits, " f"insufficient={is_insufficient}, max_score={diagnostics['max_score']:.3f}" ) + + if len(retrieval_hits) == 0: + logger.warning( + f"[AC-AISVC-17] No hits found! tenant={ctx.tenant_id}, " + f"query={ctx.query[:50]}..., raw_hits={len(hits)}, threshold={self._score_threshold}" + ) return RetrievalResult( hits=retrieval_hits, @@ -111,7 +128,7 @@ class VectorRetriever(BaseRetriever): ) except Exception as e: - logger.error(f"[AC-AISVC-16] Retrieval error: {e}") + logger.error(f"[AC-AISVC-16] Retrieval error: {e}", exc_info=True) return RetrievalResult( hits=[], diagnostics={"error": str(e), "is_insufficient": True},