89 lines
2.4 KiB
Python
89 lines
2.4 KiB
Python
"""
|
|
查看指定知识库的内容
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from qdrant_client import AsyncQdrantClient
|
|
from app.core.config import get_settings
|
|
|
|
|
|
async def check_kb_content():
|
|
"""查看知识库内容"""
|
|
settings = get_settings()
|
|
client = AsyncQdrantClient(url=settings.qdrant_url)
|
|
|
|
tenant_id = "szmp@ash@2026"
|
|
kb_id = "8559ebc9-bfaf-4211-8fe3-ee2b22a5e29c"
|
|
collection_name = f"kb_szmp_ash_2026_8559ebc9"
|
|
|
|
print("=" * 80)
|
|
print(f"查看知识库: {kb_id}")
|
|
print(f"Collection: {collection_name}")
|
|
print("=" * 80)
|
|
|
|
try:
|
|
# 检查 collection 是否存在
|
|
exists = await client.collection_exists(collection_name)
|
|
print(f"\nCollection 存在: {exists}")
|
|
|
|
if not exists:
|
|
print("Collection 不存在!")
|
|
return
|
|
|
|
# 获取 collection 信息
|
|
info = await client.get_collection(collection_name)
|
|
print(f"\nCollection 信息:")
|
|
print(f" 向量数: {info.points_count}")
|
|
|
|
# 滚动查询所有点
|
|
print(f"\n文档内容:")
|
|
print("-" * 80)
|
|
|
|
offset = None
|
|
total = 0
|
|
while True:
|
|
result = await client.scroll(
|
|
collection_name=collection_name,
|
|
limit=10,
|
|
offset=offset,
|
|
with_payload=True,
|
|
)
|
|
|
|
points = result[0]
|
|
if not points:
|
|
break
|
|
|
|
for point in points:
|
|
total += 1
|
|
payload = point.payload or {}
|
|
text = payload.get('text', 'N/A')[:100]
|
|
metadata = payload.get('metadata', {})
|
|
filename = payload.get('filename', 'N/A')
|
|
|
|
print(f"\n [{total}] ID: {point.id}")
|
|
print(f" Filename: {filename}")
|
|
print(f" Text: {text}...")
|
|
print(f" Metadata: {metadata}")
|
|
|
|
offset = result[1]
|
|
if offset is None:
|
|
break
|
|
|
|
print(f"\n总计 {total} 条记录")
|
|
|
|
except Exception as e:
|
|
print(f"\n错误: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
finally:
|
|
await client.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(check_kb_content())
|