""" 检查 Qdrant 向量数据库状态和知识库内容 """ import asyncio import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from app.core.config import get_settings from app.core.qdrant_client import get_qdrant_client async def check_qdrant(): """检查 Qdrant 状态""" settings = get_settings() tenant_id = "szmp@ash@2026" print(f"Database URL: {settings.database_url}") print(f"Qdrant URL: {settings.qdrant_url}") print(f"Tenant ID: {tenant_id}") print() try: qdrant_manager = await get_qdrant_client() client = await qdrant_manager.get_client() # 检查集合是否存在 collections = (await client.get_collections()).collections collection_names = [c.name for c in collections] print(f"Available collections: {collection_names}") print() # 筛选该租户的 collections tenant_collections = [name for name in collection_names if "szmp_ash_2026" in name] print(f"Tenant collections: {tenant_collections}") print() # 检查每个集合 for collection_name in tenant_collections: print(f"\n{'='*60}") print(f"Collection: {collection_name}") print(f"{'='*60}") # 获取集合信息 collection_info = await client.get_collection(collection_name) print(f" Points count: {collection_info.points_count}") print(f" Vectors count: {collection_info.vectors_count}") print(f" Status: {collection_info.status}") if collection_info.points_count == 0: print(" ⚠️ Collection is empty!") continue # 滚动获取一些数据 print(f"\n 前 3 条数据:") points, next_page = await client.scroll( collection_name=collection_name, limit=3, with_payload=True, with_vectors=False, ) for i, point in enumerate(points, 1): payload = point.payload or {} text = payload.get("text", "")[:100] + "..." if payload.get("text") else "N/A" kb_id = payload.get("kb_id", "N/A") metadata = payload.get("metadata", {}) print(f"\n Point {i}:") print(f" ID: {point.id}") print(f" KB ID: {kb_id}") print(f" Text: {text}") print(f" Metadata: {metadata}") # 尝试向量搜索 print(f"\n\n{'='*60}") print(f"尝试向量搜索 (query='课程'):") print(f"{'='*60}") from app.services.embedding.factory import get_embedding_provider embedding_provider = await get_embedding_provider() query_vector = await embedding_provider.embed("课程") print(f"Query vector dimension: {len(query_vector)}") for collection_name in tenant_collections: print(f"\n搜索 collection: {collection_name}") try: search_results = await client.query_points( collection_name=collection_name, query=query_vector, using="full", # 使用 full 向量 limit=3, with_payload=True, ) print(f" Search results: {len(search_results.points)}") for i, result in enumerate(search_results.points, 1): payload = result.payload or {} text = payload.get("text", "")[:80] + "..." if payload.get("text") else "N/A" print(f" {i}. [score={result.score:.4f}] {text}") except Exception as e: print(f" ❌ Search error: {e}") except Exception as e: print(f"❌ Error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(check_qdrant())