ai-robot-core/ai-service/app/api/admin/kb_optimized.py

331 lines
10 KiB
Python

"""
Knowledge base management API with RAG optimization features.
Reference: rag-optimization/spec.md Section 4.2
"""
import logging
from datetime import date
from typing import Any
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_session
from app.services.retrieval import (
ChunkMetadata,
ChunkMetadataModel,
IndexingProgress,
IndexingResult,
KnowledgeIndexer,
MetadataFilter,
RetrievalStrategy,
get_knowledge_indexer,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/kb", tags=["Knowledge Base"])
class IndexDocumentRequest(BaseModel):
"""Request to index a document."""
tenant_id: str = Field(..., description="Tenant ID")
document_id: str = Field(..., description="Document ID")
text: str = Field(..., description="Document text content")
metadata: ChunkMetadataModel | None = Field(default=None, description="Document metadata")
class IndexDocumentResponse(BaseModel):
"""Response from document indexing."""
success: bool
total_chunks: int
indexed_chunks: int
failed_chunks: int
elapsed_seconds: float
error_message: str | None = None
class IndexingProgressResponse(BaseModel):
"""Response with current indexing progress."""
total_chunks: int
processed_chunks: int
failed_chunks: int
progress_percent: int
elapsed_seconds: float
current_document: str
class MetadataFilterRequest(BaseModel):
"""Request for metadata filtering."""
categories: list[str] | None = None
target_audiences: list[str] | None = None
departments: list[str] | None = None
valid_only: bool = True
min_priority: int | None = None
keywords: list[str] | None = None
class RetrieveRequest(BaseModel):
"""Request for knowledge retrieval."""
tenant_id: str = Field(..., description="Tenant ID")
query: str = Field(..., description="Search query")
top_k: int = Field(default=10, ge=1, le=50, description="Number of results")
filters: MetadataFilterRequest | None = Field(default=None, description="Metadata filters")
strategy: RetrievalStrategy = Field(default=RetrievalStrategy.HYBRID, description="Retrieval strategy")
class RetrieveResponse(BaseModel):
"""Response from knowledge retrieval."""
hits: list[dict[str, Any]]
total_hits: int
max_score: float
is_insufficient: bool
diagnostics: dict[str, Any]
class MetadataOptionsResponse(BaseModel):
"""Response with available metadata options."""
categories: list[str]
departments: list[str]
target_audiences: list[str]
priorities: list[int]
@router.post("/index", response_model=IndexDocumentResponse)
async def index_document(
request: IndexDocumentRequest,
session: AsyncSession = Depends(get_session),
):
"""
Index a document with optimized embedding.
Features:
- Task prefixes (search_document:) for document embedding
- Multi-dimensional vectors (256/512/768)
- Metadata support
"""
try:
index = get_knowledge_indexer()
chunk_metadata = None
if request.metadata:
chunk_metadata = ChunkMetadata(
category=request.metadata.category,
subcategory=request.metadata.subcategory,
target_audience=request.metadata.target_audience,
source_doc=request.metadata.source_doc,
source_url=request.metadata.source_url,
department=request.metadata.department,
priority=request.metadata.priority,
keywords=request.metadata.keywords,
)
result = await index.index_document(
tenant_id=request.tenant_id,
document_id=request.document_id,
text=request.text,
metadata=chunk_metadata,
)
return IndexDocumentResponse(
success=result.success,
total_chunks=result.total_chunks,
indexed_chunks=result.indexed_chunks,
failed_chunks=result.failed_chunks,
elapsed_seconds=result.elapsed_seconds,
error_message=result.error_message,
)
except Exception as e:
logger.error(f"[KB-API] Failed to index document: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"索引失败: {str(e)}"
)
@router.get("/index/progress", response_model=IndexingProgressResponse | None)
async def get_indexing_progress():
"""Get current indexing progress."""
try:
index = get_knowledge_indexer()
progress = index.get_progress()
if progress is None:
return None
return IndexingProgressResponse(
total_chunks=progress.total_chunks,
processed_chunks=progress.processed_chunks,
failed_chunks=progress.failed_chunks,
progress_percent=progress.progress_percent,
elapsed_seconds=progress.elapsed_seconds,
current_document=progress.current_document,
)
except Exception as e:
logger.error(f"[KB-API] Failed to get progress: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"获取进度失败: {str(e)}"
)
@router.post("/retrieve", response_model=RetrieveResponse)
async def retrieve_knowledge(request: RetrieveRequest):
"""
Retrieve knowledge using optimized RAG.
Strategies:
- vector: Simple vector search
- bm25: BM25 keyword search
- hybrid: RRF combination of vector + BM25 (default)
- two_stage: Two-stage retrieval with Matryoshka dimensions
"""
try:
from app.services.retrieval.optimized_retriever import get_optimized_retriever
from app.services.retrieval.base import RetrievalContext
retriever = await get_optimized_retriever()
metadata_filter = None
if request.filters:
filter_dict = request.filters.model_dump(exclude_none=True)
metadata_filter = MetadataFilter(**filter_dict)
ctx = RetrievalContext(
tenant_id=request.tenant_id,
query=request.query,
)
if metadata_filter:
ctx.metadata = {"filter": metadata_filter.to_qdrant_filter()}
result = await retriever.retrieve(ctx)
return RetrieveResponse(
hits=[
{
"text": hit.text,
"score": hit.score,
"source": hit.source,
"metadata": hit.metadata,
}
for hit in result.hits
],
total_hits=result.hit_count,
max_score=result.max_score,
is_insufficient=result.diagnostics.get("is_insufficient", False),
diagnostics=result.diagnostics or {},
)
except Exception as e:
logger.error(f"[KB-API] Failed to retrieve: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"检索失败: {str(e)}"
)
@router.get("/metadata/options", response_model=MetadataOptionsResponse)
async def get_metadata_options():
"""
Get available metadata options for filtering.
These would typically be loaded from a database.
"""
try:
return MetadataOptionsResponse(
categories=[
"课程咨询",
"考试政策",
"学籍管理",
"奖助学金",
"宿舍管理",
"校园服务",
"就业指导",
"其他",
],
departments=[
"教务处",
"学生处",
"财务处",
"后勤处",
"就业指导中心",
"图书馆",
"信息中心",
],
target_audiences=[
"本科生",
"研究生",
"留学生",
"新生",
"毕业生",
"教职工",
],
priorities=list(range(1, 11)),
)
except Exception as e:
logger.error(f"[KB-API] Failed to get metadata options: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"获取选项失败: {str(e)}"
)
@router.post("/reindex")
async def reindex_all(
tenant_id: str,
session: AsyncSession = Depends(get_session),
):
"""
Reindex all documents for a tenant with optimized embedding.
This would typically read from the documents table and reindex.
"""
try:
from app.models.entities import Document, DocumentStatus
stmt = select(Document).where(
Document.tenant_id == tenant_id,
Document.status == DocumentStatus.COMPLETED.value,
)
result = await session.execute(stmt)
documents = result.scalars().all()
index = get_knowledge_indexer()
total_indexed = 0
total_failed = 0
for doc in documents:
if doc.file_path:
import os
if os.path.exists(doc.file_path):
with open(doc.file_path, 'r', encoding='utf-8') as f:
text = f.read()
result = await index.index_document(
tenant_id=tenant_id,
document_id=str(doc.id),
text=text,
)
total_indexed += result.indexed_chunks
total_failed += result.failed_chunks
return {
"success": True,
"total_documents": len(documents),
"total_indexed": total_indexed,
"total_failed": total_failed,
}
except Exception as e:
logger.error(f"[KB-API] Failed to reindex: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"重新索引失败: {str(e)}"
)