diff --git a/ai-service/app/api/admin/rag.py b/ai-service/app/api/admin/rag.py index 63cd782..e5b46f1 100644 --- a/ai-service/app/api/admin/rag.py +++ b/ai-service/app/api/admin/rag.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, Field from app.core.config import get_settings from app.core.exceptions import MissingTenantIdException +from app.core.prompts import format_evidence_for_prompt, build_user_prompt_with_evidence from app.core.tenant import get_tenant_id from app.models import ErrorResponse from app.services.retrieval.vector_retriever import get_vector_retriever @@ -226,6 +227,11 @@ async def run_rag_experiment_stream( final_prompt = _build_final_prompt(request.query, retrieval_results) + logger.info(f"[AC-ASA-20] ========== RAG LAB STREAM FULL PROMPT ==========") + logger.info(f"[AC-ASA-20] Prompt length: {len(final_prompt)}") + logger.info(f"[AC-ASA-20] Prompt content:\n{final_prompt}") + logger.info(f"[AC-ASA-20] ==============================================") + yield f"event: retrieval\ndata: {json.dumps({'results': retrieval_results, 'count': len(retrieval_results)})}\n\n" yield f"event: prompt\ndata: {json.dumps({'prompt': final_prompt})}\n\n" @@ -270,6 +276,11 @@ async def _generate_ai_response( """ import time + logger.info(f"[AC-ASA-19] ========== RAG LAB FULL PROMPT ==========") + logger.info(f"[AC-ASA-19] Prompt length: {len(prompt)}") + logger.info(f"[AC-ASA-19] Prompt content:\n{prompt}") + logger.info(f"[AC-ASA-19] ==========================================") + try: manager = get_llm_config_manager() client = manager.get_client() @@ -300,33 +311,10 @@ async def _generate_ai_response( def _build_final_prompt(query: str, retrieval_results: list[dict]) -> str: """ Build the final prompt from query and retrieval results. + Uses shared prompt configuration for consistency with orchestrator. """ - if not retrieval_results: - return f"""用户问题:{query} - -未找到相关检索结果,请基于通用知识回答用户问题。""" - - evidence_text = "\n".join([ - f"{i+1}. [Score: {hit['score']:.2f}] {hit['content'][:200]}{'...' if len(hit['content']) > 200 else ''}" - for i, hit in enumerate(retrieval_results[:5]) - ]) - - return f"""【系统指令】 -你是一名经验丰富的客服专员,名字叫“小N”。请遵循以下准则: -1. 使用自然、口语化的中文,语气亲切、有礼貌,像在和人聊天一样。 -2. 不要使用列表、编号或过于机械的结构,而是用连贯的段落表达。 -3. 如果知识库信息不足,可以委婉地说明“我帮您查一下”、“我确认一下”,而不是直接说“我不知道”。 -4. 不要提及“作为AI”、“根据算法”等词,避免暴露机器身份。 -5. 适当使用语气词(如“呢”、“哦”、“哈”)、表情符号(根据品牌调性可选)和人性化的表达(如“我理解”、“您别着急”)。 -6. 回答长度适中,一般不超过3-5句话,如需详细说明可分段落,但保持易读。 -7. 如果问题涉及隐私或敏感信息,请引导客户联系人工客服(如“为了您的账户安全,建议您拨打官方电话...”)。 - -【知识库内容】 -{evidence_text} - -【用户问题】 -{query} -""" + evidence_text = format_evidence_for_prompt(retrieval_results, max_results=5, max_content_length=500) + return build_user_prompt_with_evidence(query, evidence_text) def _get_fallback_results(query: str) -> list[dict]: diff --git a/ai-service/app/core/prompts.py b/ai-service/app/core/prompts.py new file mode 100644 index 0000000..91198b8 --- /dev/null +++ b/ai-service/app/core/prompts.py @@ -0,0 +1,114 @@ +""" +Shared prompt templates for AI Service. +Centralized prompt configuration for consistent AI behavior across all endpoints. +""" + +SYSTEM_PROMPT = """你是一名经验丰富的客服专员,名字叫"小N"。请遵循以下准则: +像朋友一样聊天:使用自然、口语化的中文,语气亲切有礼貌,就像在和一位老朋友聊天。多使用"咱们"、"您看"、"是不是"这样拉近距离的表达。 +避免生硬罗列:介绍多个信息时,不要用"第一、第二"或分点列举,而是用流畅的段落把它们串联起来。比如:"我们这边有几类课程挺受欢迎的,一个是……另外还有……您对哪方面更感兴趣呀?" +主动引导对话:不要一次性把知识库里的所有内容都倒出来。可以先简单概括,然后问问客户具体想了解什么,比如:"您是想提升职场英语,还是有留学方面的打算呢?" +委婉处理未知:如果知识库里没有答案,别直接说"不知道",可以说:"您稍等,我帮您确认一下"、"这个问题我需要问问同事,马上给您回复"。 +隐藏机器身份:绝不要提及"作为AI"、"根据算法"等词,也避免使用过于官方的术语。就像普通人一样用"我"、"我们"。 +多用语气词和情感表达:适当加入"呢"、"哦"、"哈"、"呀"等语气词,根据情况使用表情符号(😊)。表达理解和关心,比如"我明白您的意思"、"别着急,我们一起看看"。 +保持简洁但有温度:回答一般控制在3-5句话,如果需要详细说明,可以分成小段落,但每段都要口语化,读起来不累。 +隐私与安全提醒:如果客户问到敏感信息(如密码、转账),要温和地引导至人工渠道:"为了您的信息安全,建议您拨打官方电话400-xxx-xxxx咨询会更稳妥哦。""" + + +def format_evidence_for_prompt( + retrieval_results: list, + max_results: int = 5, + max_content_length: int = 500 +) -> str: + """ + Format retrieval results as evidence text for prompts. + + Args: + retrieval_results: List of retrieval hits. Can be: + - dict format: {'content', 'score', 'source', 'metadata'} + - RetrievalHit object: with .text, .score, .source, .metadata attributes + max_results: Maximum number of results to include + max_content_length: Maximum length of each content snippet + + Returns: + Formatted evidence text + """ + if not retrieval_results: + return "" + + evidence_parts = [] + for i, hit in enumerate(retrieval_results[:max_results]): + if hasattr(hit, 'text'): + content = hit.text + score = hit.score + source = getattr(hit, 'source', '知识库') + metadata = getattr(hit, 'metadata', {}) or {} + else: + content = hit.get('content', '') + score = hit.get('score', 0) + source = hit.get('source', '知识库') + metadata = hit.get('metadata', {}) or {} + + if len(content) > max_content_length: + content = content[:max_content_length] + '...' + + nested_meta = metadata.get('metadata', {}) + source_doc = nested_meta.get('source_doc', source) if nested_meta else source + category = nested_meta.get('category', '') if nested_meta else '' + department = nested_meta.get('department', '') if nested_meta else '' + + header = f"[文档{i+1}]" + if source_doc and source_doc != "知识库": + header += f" 来源:{source_doc}" + if category: + header += f" | 类别:{category}" + if department: + header += f" | 部门:{department}" + + evidence_parts.append(f"{header}\n相关度:{score:.2f}\n内容:{content}") + + return "\n\n".join(evidence_parts) + + +def build_system_prompt_with_evidence(evidence_text: str) -> str: + """ + Build system prompt with knowledge base evidence. + + Args: + evidence_text: Formatted evidence from retrieval results + + Returns: + Complete system prompt + """ + if not evidence_text: + return SYSTEM_PROMPT + + return f"""{SYSTEM_PROMPT} + +知识库参考内容: +{evidence_text}""" + + +def build_user_prompt_with_evidence(query: str, evidence_text: str) -> str: + """ + Build user prompt with knowledge base evidence (for single-message format). + + Args: + query: User's question + evidence_text: Formatted evidence from retrieval results + + Returns: + Complete user prompt + """ + if not evidence_text: + return f"""用户问题:{query} + +未找到相关检索结果,请基于通用知识回答用户问题。""" + + return f"""【系统指令】 +{SYSTEM_PROMPT} + +【知识库内容】 +{evidence_text} + +【用户问题】 +{query}""" diff --git a/ai-service/app/services/llm/openai_client.py b/ai-service/app/services/llm/openai_client.py index 882ef93..fc36959 100644 --- a/ai-service/app/services/llm/openai_client.py +++ b/ai-service/app/services/llm/openai_client.py @@ -133,6 +133,13 @@ class OpenAIClient(LLMClient): body = self._build_request_body(messages, effective_config, stream=False, **kwargs) logger.info(f"[AC-AISVC-02] Generating response with model={effective_config.model}") + logger.info(f"[AC-AISVC-02] ========== FULL PROMPT TO AI ==========") + for i, msg in enumerate(messages): + role = msg.get("role", "unknown") + content = msg.get("content", "") + logger.info(f"[AC-AISVC-02] [{i}] role={role}, content_length={len(content)}") + logger.info(f"[AC-AISVC-02] [{i}] content:\n{content}") + logger.info(f"[AC-AISVC-02] ======================================") try: response = await client.post( @@ -213,6 +220,13 @@ class OpenAIClient(LLMClient): body = self._build_request_body(messages, effective_config, stream=True, **kwargs) logger.info(f"[AC-AISVC-06] Starting streaming generation with model={effective_config.model}") + logger.info(f"[AC-AISVC-06] ========== FULL PROMPT TO AI (STREAMING) ==========") + for i, msg in enumerate(messages): + role = msg.get("role", "unknown") + content = msg.get("content", "") + logger.info(f"[AC-AISVC-06] [{i}] role={role}, content_length={len(content)}") + logger.info(f"[AC-AISVC-06] [{i}] content:\n{content}") + logger.info(f"[AC-AISVC-06] ======================================") try: async with client.stream( diff --git a/ai-service/app/services/orchestrator.py b/ai-service/app/services/orchestrator.py index 79d0d7c..42d16dc 100644 --- a/ai-service/app/services/orchestrator.py +++ b/ai-service/app/services/orchestrator.py @@ -25,6 +25,7 @@ from typing import Any, AsyncGenerator from sse_starlette.sse import ServerSentEvent from app.core.config import get_settings +from app.core.prompts import SYSTEM_PROMPT, format_evidence_for_prompt from app.core.sse import ( create_error_event, create_final_event, @@ -41,16 +42,6 @@ from app.services.retrieval.base import BaseRetriever, RetrievalContext, Retriev logger = logging.getLogger(__name__) -OPTIMIZED_SYSTEM_PROMPT = """你是学校智能客服助手,基于提供的知识库内容回答用户问题。 - -回答要求: -1. 严格基于提供的知识库内容回答,不要编造信息 -2. 如果知识库中没有相关信息,明确告知用户并建议转人工或稍后重试 -3. 保持专业、友好的语气,回答简洁明了,突出重点 -4. 如果引用知识库内容,请注明来源(如:根据[文档1]...) -5. 对于时效性问题,请提醒用户注意文档的有效期""" - - @dataclass class OrchestratorConfig: """ @@ -59,7 +50,7 @@ class OrchestratorConfig: """ max_history_tokens: int = 4000 max_evidence_tokens: int = 2000 - system_prompt: str = OPTIMIZED_SYSTEM_PROMPT + system_prompt: str = SYSTEM_PROMPT enable_rag: bool = True use_optimized_retriever: bool = True @@ -408,32 +399,23 @@ class OrchestratorService: f"system_len={len(system_content)}, history_count={len(ctx.merged_context.messages) if ctx.merged_context else 0}" ) logger.debug(f"[AC-AISVC-02] System prompt preview: {system_content[:500]}...") + + logger.info(f"[AC-AISVC-02] ========== ORCHESTRATOR FULL PROMPT ==========") + for i, msg in enumerate(messages): + role = msg.get("role", "unknown") + content = msg.get("content", "") + logger.info(f"[AC-AISVC-02] [{i}] role={role}, content_length={len(content)}") + logger.info(f"[AC-AISVC-02] [{i}] content:\n{content}") + logger.info(f"[AC-AISVC-02] ==============================================") return messages def _format_evidence(self, retrieval_result: RetrievalResult) -> str: """ [AC-AISVC-17] Format retrieval hits as evidence text. - Optimized format with source attribution and metadata. + Uses shared prompt configuration for consistency. """ - evidence_parts = [] - for i, hit in enumerate(retrieval_result.hits[:5], 1): - metadata = hit.metadata or {} - source = metadata.get("metadata", {}).get("source_doc", "知识库") - category = metadata.get("metadata", {}).get("category", "") - department = metadata.get("metadata", {}).get("department", "") - - header = f"[文档{i}]" - if source and source != "知识库": - header += f" 来源:{source}" - if category: - header += f" | 类别:{category}" - if department: - header += f" | 部门:{department}" - - evidence_parts.append(f"{header}\n相关度:{hit.score:.2f}\n内容:{hit.text}") - - return "\n\n".join(evidence_parts) + return format_evidence_for_prompt(retrieval_result.hits, max_results=5, max_content_length=500) def _fallback_response(self, ctx: GenerationContext) -> str: """