feat: add metadata validation in KB upload and unify metadata storage [AC-IDSMETA-15, AC-IDSMETA-16]
This commit is contained in:
parent
c432f457b8
commit
d3ae92dec5
|
|
@ -7,7 +7,7 @@ Knowledge Base management endpoints.
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Annotated, Optional
|
from typing import Annotated, Any, Optional
|
||||||
|
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, Query, UploadFile
|
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, Query, UploadFile
|
||||||
|
|
@ -479,7 +479,7 @@ async def list_documents(
|
||||||
"/documents",
|
"/documents",
|
||||||
operation_id="uploadDocument",
|
operation_id="uploadDocument",
|
||||||
summary="Upload/import document",
|
summary="Upload/import document",
|
||||||
description="[AC-ASA-01, AC-AISVC-63] Upload document to specified knowledge base and trigger indexing job.",
|
description="[AC-ASA-01, AC-AISVC-63, AC-IDSMETA-15] Upload document to specified knowledge base and trigger indexing job.",
|
||||||
responses={
|
responses={
|
||||||
202: {"description": "Accepted - async indexing job started"},
|
202: {"description": "Accepted - async indexing job started"},
|
||||||
400: {"description": "Bad Request - unsupported format or invalid kb_id"},
|
400: {"description": "Bad Request - unsupported format or invalid kb_id"},
|
||||||
|
|
@ -493,17 +493,28 @@ async def upload_document(
|
||||||
background_tasks: BackgroundTasks,
|
background_tasks: BackgroundTasks,
|
||||||
file: UploadFile = File(...),
|
file: UploadFile = File(...),
|
||||||
kb_id: str = Form(...),
|
kb_id: str = Form(...),
|
||||||
|
metadata: str = Form(default="{}", description="元数据 JSON 字符串,根据元数据模式配置动态字段"),
|
||||||
) -> JSONResponse:
|
) -> JSONResponse:
|
||||||
"""
|
"""
|
||||||
[AC-ASA-01, AC-AISVC-63] Upload document to specified knowledge base.
|
[AC-ASA-01, AC-AISVC-63, AC-IDSMETA-15] Upload document to specified knowledge base.
|
||||||
Creates KB if not exists, indexes to corresponding Qdrant Collection.
|
Creates KB if not exists, indexes to corresponding Qdrant Collection.
|
||||||
|
|
||||||
|
[AC-IDSMETA-15] 支持动态元数据校验:
|
||||||
|
- metadata: JSON 格式的元数据,字段根据元数据模式配置
|
||||||
|
- 根据 scope=kb_document 的字段定义进行 required/type/enum 校验
|
||||||
|
|
||||||
|
示例 metadata:
|
||||||
|
- 教育行业: {"grade": "初一", "subject": "语文", "type": "痛点"}
|
||||||
|
- 医疗行业: {"department": "内科", "disease_type": "慢性病", "content_type": "科普"}
|
||||||
"""
|
"""
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app.services.document import get_supported_document_formats
|
from app.services.document import get_supported_document_formats
|
||||||
|
from app.services.metadata_field_definition_service import MetadataFieldDefinitionService
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-63] Uploading document: tenant={tenant_id}, "
|
f"[AC-IDSMETA-15] Uploading document: tenant={tenant_id}, "
|
||||||
f"kb_id={kb_id}, filename={file.filename}"
|
f"kb_id={kb_id}, filename={file.filename}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -522,6 +533,36 @@ async def upload_document(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata_dict = json.loads(metadata) if metadata else {}
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content={
|
||||||
|
"code": "INVALID_METADATA",
|
||||||
|
"message": "Invalid JSON format for metadata",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
field_def_service = MetadataFieldDefinitionService(session)
|
||||||
|
|
||||||
|
is_valid, validation_errors = await field_def_service.validate_metadata_for_create(
|
||||||
|
tenant_id, metadata_dict, "kb_document"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not is_valid:
|
||||||
|
logger.warning(f"[AC-IDSMETA-15] Metadata validation failed: {validation_errors}")
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content={
|
||||||
|
"code": "METADATA_VALIDATION_ERROR",
|
||||||
|
"message": "Metadata validation failed",
|
||||||
|
"details": {
|
||||||
|
"errors": validation_errors,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
kb_service = KnowledgeBaseService(session)
|
kb_service = KnowledgeBaseService(session)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -529,7 +570,7 @@ async def upload_document(
|
||||||
if not kb:
|
if not kb:
|
||||||
kb = await kb_service.get_or_create_default_kb(tenant_id)
|
kb = await kb_service.get_or_create_default_kb(tenant_id)
|
||||||
kb_id = str(kb.id)
|
kb_id = str(kb.id)
|
||||||
logger.info(f"[AC-AISVC-63] KB not found, using default: {kb_id}")
|
logger.info(f"[AC-IDSMETA-15] KB not found, using default: {kb_id}")
|
||||||
else:
|
else:
|
||||||
kb_id = str(kb.id)
|
kb_id = str(kb.id)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -550,7 +591,7 @@ async def upload_document(
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
background_tasks.add_task(
|
background_tasks.add_task(
|
||||||
_index_document, tenant_id, kb_id, str(job.id), str(document.id), file_content, file.filename
|
_index_document, tenant_id, kb_id, str(job.id), str(document.id), file_content, file.filename, metadata_dict
|
||||||
)
|
)
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
|
|
@ -560,6 +601,7 @@ async def upload_document(
|
||||||
"docId": str(document.id),
|
"docId": str(document.id),
|
||||||
"kbId": kb_id,
|
"kbId": kb_id,
|
||||||
"status": job.status,
|
"status": job.status,
|
||||||
|
"metadata": metadata_dict,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -571,11 +613,15 @@ async def _index_document(
|
||||||
doc_id: str,
|
doc_id: str,
|
||||||
content: bytes,
|
content: bytes,
|
||||||
filename: str | None = None,
|
filename: str | None = None,
|
||||||
|
metadata: dict[str, Any] | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Background indexing task.
|
Background indexing task.
|
||||||
[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-63] Uses document parsing and pluggable embedding.
|
[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-63] Uses document parsing and pluggable embedding.
|
||||||
Indexes to the specified knowledge base's Qdrant Collection.
|
Indexes to the specified knowledge base's Qdrant Collection.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata: 动态元数据,字段根据元数据模式配置
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
@ -704,6 +750,10 @@ async def _index_document(
|
||||||
|
|
||||||
points = []
|
points = []
|
||||||
total_chunks = len(all_chunks)
|
total_chunks = len(all_chunks)
|
||||||
|
|
||||||
|
doc_metadata = metadata or {}
|
||||||
|
logger.info(f"[INDEX] Document metadata: {doc_metadata}")
|
||||||
|
|
||||||
for i, chunk in enumerate(all_chunks):
|
for i, chunk in enumerate(all_chunks):
|
||||||
payload = {
|
payload = {
|
||||||
"text": chunk.text,
|
"text": chunk.text,
|
||||||
|
|
@ -712,6 +762,7 @@ async def _index_document(
|
||||||
"chunk_index": i,
|
"chunk_index": i,
|
||||||
"start_token": chunk.start_token,
|
"start_token": chunk.start_token,
|
||||||
"end_token": chunk.end_token,
|
"end_token": chunk.end_token,
|
||||||
|
"metadata": doc_metadata,
|
||||||
}
|
}
|
||||||
if chunk.page is not None:
|
if chunk.page is not None:
|
||||||
payload["page"] = chunk.page
|
payload["page"] = chunk.page
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ class ScriptFlowService:
|
||||||
) -> ScriptFlow:
|
) -> ScriptFlow:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-71] Create a new script flow with steps.
|
[AC-AISVC-71] Create a new script flow with steps.
|
||||||
|
[AC-IDSMETA-16] Support metadata field.
|
||||||
"""
|
"""
|
||||||
self._validate_steps(create_data.steps)
|
self._validate_steps(create_data.steps)
|
||||||
|
|
||||||
|
|
@ -51,12 +52,13 @@ class ScriptFlowService:
|
||||||
description=create_data.description,
|
description=create_data.description,
|
||||||
steps=create_data.steps,
|
steps=create_data.steps,
|
||||||
is_enabled=create_data.is_enabled,
|
is_enabled=create_data.is_enabled,
|
||||||
|
metadata_=create_data.metadata_,
|
||||||
)
|
)
|
||||||
self._session.add(flow)
|
self._session.add(flow)
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-71] Created script flow: tenant={tenant_id}, "
|
f"[AC-AISVC-71][AC-IDSMETA-16] Created script flow: tenant={tenant_id}, "
|
||||||
f"id={flow.id}, name={flow.name}, steps={len(flow.steps)}"
|
f"id={flow.id}, name={flow.name}, steps={len(flow.steps)}"
|
||||||
)
|
)
|
||||||
return flow
|
return flow
|
||||||
|
|
@ -102,6 +104,7 @@ class ScriptFlowService:
|
||||||
) -> dict[str, Any] | None:
|
) -> dict[str, Any] | None:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-73] Get flow detail with complete step definitions.
|
[AC-AISVC-73] Get flow detail with complete step definitions.
|
||||||
|
[AC-IDSMETA-16] Include metadata field.
|
||||||
"""
|
"""
|
||||||
flow = await self.get_flow(tenant_id, flow_id)
|
flow = await self.get_flow(tenant_id, flow_id)
|
||||||
if not flow:
|
if not flow:
|
||||||
|
|
@ -117,6 +120,7 @@ class ScriptFlowService:
|
||||||
"is_enabled": flow.is_enabled,
|
"is_enabled": flow.is_enabled,
|
||||||
"step_count": len(flow.steps),
|
"step_count": len(flow.steps),
|
||||||
"linked_rule_count": linked_rule_count,
|
"linked_rule_count": linked_rule_count,
|
||||||
|
"metadata": flow.metadata_,
|
||||||
"created_at": flow.created_at.isoformat(),
|
"created_at": flow.created_at.isoformat(),
|
||||||
"updated_at": flow.updated_at.isoformat(),
|
"updated_at": flow.updated_at.isoformat(),
|
||||||
}
|
}
|
||||||
|
|
@ -129,6 +133,7 @@ class ScriptFlowService:
|
||||||
) -> ScriptFlow | None:
|
) -> ScriptFlow | None:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-73] Update flow definition.
|
[AC-AISVC-73] Update flow definition.
|
||||||
|
[AC-IDSMETA-16] Support metadata field.
|
||||||
"""
|
"""
|
||||||
flow = await self.get_flow(tenant_id, flow_id)
|
flow = await self.get_flow(tenant_id, flow_id)
|
||||||
if not flow:
|
if not flow:
|
||||||
|
|
@ -143,12 +148,14 @@ class ScriptFlowService:
|
||||||
flow.steps = update_data.steps
|
flow.steps = update_data.steps
|
||||||
if update_data.is_enabled is not None:
|
if update_data.is_enabled is not None:
|
||||||
flow.is_enabled = update_data.is_enabled
|
flow.is_enabled = update_data.is_enabled
|
||||||
|
if update_data.metadata_ is not None:
|
||||||
|
flow.metadata_ = update_data.metadata_
|
||||||
flow.updated_at = datetime.utcnow()
|
flow.updated_at = datetime.utcnow()
|
||||||
|
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-73] Updated script flow: tenant={tenant_id}, id={flow_id}"
|
f"[AC-AISVC-73][AC-IDSMETA-16] Updated script flow: tenant={tenant_id}, id={flow_id}"
|
||||||
)
|
)
|
||||||
return flow
|
return flow
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,7 @@ class IntentRuleService:
|
||||||
) -> IntentRule:
|
) -> IntentRule:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-65] Create a new intent rule.
|
[AC-AISVC-65] Create a new intent rule.
|
||||||
|
[AC-IDSMETA-16] Support metadata field.
|
||||||
"""
|
"""
|
||||||
flow_id_uuid = None
|
flow_id_uuid = None
|
||||||
if create_data.flow_id:
|
if create_data.flow_id:
|
||||||
|
|
@ -104,6 +105,7 @@ class IntentRuleService:
|
||||||
transfer_message=create_data.transfer_message,
|
transfer_message=create_data.transfer_message,
|
||||||
is_enabled=True,
|
is_enabled=True,
|
||||||
hit_count=0,
|
hit_count=0,
|
||||||
|
metadata_=create_data.metadata_,
|
||||||
)
|
)
|
||||||
self._session.add(rule)
|
self._session.add(rule)
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
@ -111,7 +113,7 @@ class IntentRuleService:
|
||||||
self._cache.invalidate(tenant_id)
|
self._cache.invalidate(tenant_id)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-65] Created intent rule: tenant={tenant_id}, "
|
f"[AC-AISVC-65][AC-IDSMETA-16] Created intent rule: tenant={tenant_id}, "
|
||||||
f"id={rule.id}, name={rule.name}, response_type={rule.response_type}"
|
f"id={rule.id}, name={rule.name}, response_type={rule.response_type}"
|
||||||
)
|
)
|
||||||
return rule
|
return rule
|
||||||
|
|
@ -162,6 +164,7 @@ class IntentRuleService:
|
||||||
) -> IntentRule | None:
|
) -> IntentRule | None:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-67] Update an intent rule.
|
[AC-AISVC-67] Update an intent rule.
|
||||||
|
[AC-IDSMETA-16] Support metadata field.
|
||||||
"""
|
"""
|
||||||
rule = await self.get_rule(tenant_id, rule_id)
|
rule = await self.get_rule(tenant_id, rule_id)
|
||||||
if not rule:
|
if not rule:
|
||||||
|
|
@ -190,6 +193,8 @@ class IntentRuleService:
|
||||||
rule.transfer_message = update_data.transfer_message
|
rule.transfer_message = update_data.transfer_message
|
||||||
if update_data.is_enabled is not None:
|
if update_data.is_enabled is not None:
|
||||||
rule.is_enabled = update_data.is_enabled
|
rule.is_enabled = update_data.is_enabled
|
||||||
|
if update_data.metadata_ is not None:
|
||||||
|
rule.metadata_ = update_data.metadata_
|
||||||
|
|
||||||
rule.updated_at = datetime.utcnow()
|
rule.updated_at = datetime.utcnow()
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
@ -197,7 +202,7 @@ class IntentRuleService:
|
||||||
self._cache.invalidate(tenant_id)
|
self._cache.invalidate(tenant_id)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-67] Updated intent rule: tenant={tenant_id}, id={rule_id}"
|
f"[AC-AISVC-67][AC-IDSMETA-16] Updated intent rule: tenant={tenant_id}, id={rule_id}"
|
||||||
)
|
)
|
||||||
return rule
|
return rule
|
||||||
|
|
||||||
|
|
@ -294,6 +299,7 @@ class IntentRuleService:
|
||||||
"transfer_message": rule.transfer_message,
|
"transfer_message": rule.transfer_message,
|
||||||
"is_enabled": rule.is_enabled,
|
"is_enabled": rule.is_enabled,
|
||||||
"hit_count": rule.hit_count,
|
"hit_count": rule.hit_count,
|
||||||
|
"metadata": rule.metadata_,
|
||||||
"created_at": rule.created_at.isoformat(),
|
"created_at": rule.created_at.isoformat(),
|
||||||
"updated_at": rule.updated_at.isoformat(),
|
"updated_at": rule.updated_at.isoformat(),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -95,6 +95,7 @@ class PromptTemplateService:
|
||||||
) -> PromptTemplate:
|
) -> PromptTemplate:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-52] Create a new prompt template with initial version.
|
[AC-AISVC-52] Create a new prompt template with initial version.
|
||||||
|
[AC-IDSMETA-16] Support metadata field.
|
||||||
"""
|
"""
|
||||||
template = PromptTemplate(
|
template = PromptTemplate(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
|
|
@ -102,6 +103,7 @@ class PromptTemplateService:
|
||||||
scene=create_data.scene,
|
scene=create_data.scene,
|
||||||
description=create_data.description,
|
description=create_data.description,
|
||||||
is_default=create_data.is_default,
|
is_default=create_data.is_default,
|
||||||
|
metadata_=create_data.metadata_,
|
||||||
)
|
)
|
||||||
self._session.add(template)
|
self._session.add(template)
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
@ -117,7 +119,7 @@ class PromptTemplateService:
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-52] Created prompt template: tenant={tenant_id}, "
|
f"[AC-AISVC-52][AC-IDSMETA-16] Created prompt template: tenant={tenant_id}, "
|
||||||
f"id={template.id}, name={template.name}"
|
f"id={template.id}, name={template.name}"
|
||||||
)
|
)
|
||||||
return template
|
return template
|
||||||
|
|
@ -182,6 +184,7 @@ class PromptTemplateService:
|
||||||
"scene": template.scene,
|
"scene": template.scene,
|
||||||
"description": template.description,
|
"description": template.description,
|
||||||
"is_default": template.is_default,
|
"is_default": template.is_default,
|
||||||
|
"metadata": template.metadata_,
|
||||||
"current_version": {
|
"current_version": {
|
||||||
"version": current_version.version,
|
"version": current_version.version,
|
||||||
"status": current_version.status,
|
"status": current_version.status,
|
||||||
|
|
@ -208,6 +211,7 @@ class PromptTemplateService:
|
||||||
) -> PromptTemplate | None:
|
) -> PromptTemplate | None:
|
||||||
"""
|
"""
|
||||||
[AC-AISVC-53] Update template and create a new version.
|
[AC-AISVC-53] Update template and create a new version.
|
||||||
|
[AC-IDSMETA-16] Support metadata field.
|
||||||
"""
|
"""
|
||||||
template = await self.get_template(tenant_id, template_id)
|
template = await self.get_template(tenant_id, template_id)
|
||||||
if not template:
|
if not template:
|
||||||
|
|
@ -221,6 +225,8 @@ class PromptTemplateService:
|
||||||
template.description = update_data.description
|
template.description = update_data.description
|
||||||
if update_data.is_default is not None:
|
if update_data.is_default is not None:
|
||||||
template.is_default = update_data.is_default
|
template.is_default = update_data.is_default
|
||||||
|
if update_data.metadata_ is not None:
|
||||||
|
template.metadata_ = update_data.metadata_
|
||||||
template.updated_at = datetime.utcnow()
|
template.updated_at = datetime.utcnow()
|
||||||
|
|
||||||
if update_data.system_instruction is not None:
|
if update_data.system_instruction is not None:
|
||||||
|
|
@ -241,7 +247,7 @@ class PromptTemplateService:
|
||||||
self._cache.invalidate(tenant_id, template.scene)
|
self._cache.invalidate(tenant_id, template.scene)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[AC-AISVC-53] Updated prompt template: tenant={tenant_id}, id={template_id}"
|
f"[AC-AISVC-53][AC-IDSMETA-16] Updated prompt template: tenant={tenant_id}, id={template_id}"
|
||||||
)
|
)
|
||||||
return template
|
return template
|
||||||
|
|
||||||
|
|
@ -400,11 +406,19 @@ class PromptTemplateService:
|
||||||
if not template:
|
if not template:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
versions = await self._get_versions(template_id)
|
from sqlalchemy import delete
|
||||||
for v in versions:
|
|
||||||
await self._session.delete(v)
|
|
||||||
|
|
||||||
await self._session.delete(template)
|
await self._session.execute(
|
||||||
|
delete(PromptTemplateVersion).where(
|
||||||
|
PromptTemplateVersion.template_id == template_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
await self._session.execute(
|
||||||
|
delete(PromptTemplate).where(
|
||||||
|
PromptTemplate.id == template_id
|
||||||
|
)
|
||||||
|
)
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
|
||||||
self._cache.invalidate(tenant_id, template.scene)
|
self._cache.invalidate(tenant_id, template.scene)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue