feat: implement decomposition template with version control [AC-IDSMETA-21, AC-IDSMETA-22]
This commit is contained in:
parent
c4ad6eb8ce
commit
83bc1d0830
|
|
@ -0,0 +1,296 @@
|
|||
"""
|
||||
Decomposition Template API.
|
||||
[AC-IDSMETA-21, AC-IDSMETA-22] 拆解模板管理接口,支持文本拆解为结构化数据。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.database import get_session
|
||||
from app.core.exceptions import MissingTenantIdException
|
||||
from app.core.tenant import get_tenant_id
|
||||
from app.models.entities import (
|
||||
DecompositionRequest,
|
||||
DecompositionTemplateCreate,
|
||||
DecompositionTemplateStatus,
|
||||
DecompositionTemplateUpdate,
|
||||
)
|
||||
from app.services.decomposition_template_service import DecompositionTemplateService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/admin/decomposition-templates", tags=["DecompositionTemplates"])
|
||||
|
||||
|
||||
def get_current_tenant_id() -> str:
|
||||
"""Get current tenant ID from context."""
|
||||
tenant_id = get_tenant_id()
|
||||
if not tenant_id:
|
||||
raise MissingTenantIdException()
|
||||
return tenant_id
|
||||
|
||||
|
||||
@router.get(
|
||||
"",
|
||||
operation_id="listDecompositionTemplates",
|
||||
summary="List decomposition templates",
|
||||
description="[AC-IDSMETA-22] 获取拆解模板列表,支持按状态过滤",
|
||||
)
|
||||
async def list_templates(
|
||||
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
status: Annotated[str | None, Query(
|
||||
description="按状态过滤: draft/published/archived"
|
||||
)] = None,
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
[AC-IDSMETA-22] 列出拆解模板
|
||||
"""
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Listing decomposition templates: "
|
||||
f"tenant={tenant_id}, status={status}"
|
||||
)
|
||||
|
||||
if status and status not in [s.value for s in DecompositionTemplateStatus]:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={
|
||||
"code": "INVALID_STATUS",
|
||||
"message": f"Invalid status: {status}",
|
||||
"details": {
|
||||
"valid_values": [s.value for s in DecompositionTemplateStatus]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
service = DecompositionTemplateService(session)
|
||||
templates = await service.list_templates(tenant_id, status)
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"items": [
|
||||
{
|
||||
"id": str(t.id),
|
||||
"name": t.name,
|
||||
"description": t.description,
|
||||
"version": t.version,
|
||||
"status": t.status,
|
||||
"template_schema": t.template_schema,
|
||||
"extraction_hints": t.extraction_hints,
|
||||
"example_input": t.example_input,
|
||||
"example_output": t.example_output,
|
||||
"created_at": t.created_at.isoformat(),
|
||||
"updated_at": t.updated_at.isoformat(),
|
||||
}
|
||||
for t in templates
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"",
|
||||
operation_id="createDecompositionTemplate",
|
||||
summary="Create decomposition template",
|
||||
description="[AC-IDSMETA-22] 创建新的拆解模板",
|
||||
status_code=201,
|
||||
)
|
||||
async def create_template(
|
||||
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
template_create: DecompositionTemplateCreate,
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
[AC-IDSMETA-22] 创建拆解模板
|
||||
"""
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Creating decomposition template: "
|
||||
f"tenant={tenant_id}, name={template_create.name}"
|
||||
)
|
||||
|
||||
service = DecompositionTemplateService(session)
|
||||
template = await service.create_template(tenant_id, template_create)
|
||||
await session.commit()
|
||||
|
||||
return JSONResponse(
|
||||
status_code=201,
|
||||
content={
|
||||
"id": str(template.id),
|
||||
"name": template.name,
|
||||
"description": template.description,
|
||||
"version": template.version,
|
||||
"status": template.status,
|
||||
"template_schema": template.template_schema,
|
||||
"extraction_hints": template.extraction_hints,
|
||||
"example_input": template.example_input,
|
||||
"example_output": template.example_output,
|
||||
"created_at": template.created_at.isoformat(),
|
||||
"updated_at": template.updated_at.isoformat(),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/latest",
|
||||
operation_id="getLatestPublishedTemplate",
|
||||
summary="Get latest published template",
|
||||
description="[AC-IDSMETA-22] 获取最近生效的发布版本模板",
|
||||
)
|
||||
async def get_latest_template(
|
||||
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
[AC-IDSMETA-22] 获取最近生效的发布版本模板
|
||||
"""
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Getting latest published template: tenant={tenant_id}"
|
||||
)
|
||||
|
||||
service = DecompositionTemplateService(session)
|
||||
template = await service.get_latest_published_template(tenant_id)
|
||||
|
||||
if not template:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content={
|
||||
"code": "NOT_FOUND",
|
||||
"message": "No published template found",
|
||||
}
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"id": str(template.id),
|
||||
"name": template.name,
|
||||
"description": template.description,
|
||||
"version": template.version,
|
||||
"status": template.status,
|
||||
"template_schema": template.template_schema,
|
||||
"extraction_hints": template.extraction_hints,
|
||||
"example_input": template.example_input,
|
||||
"example_output": template.example_output,
|
||||
"created_at": template.created_at.isoformat(),
|
||||
"updated_at": template.updated_at.isoformat(),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.put(
|
||||
"/{id}",
|
||||
operation_id="updateDecompositionTemplate",
|
||||
summary="Update decomposition template",
|
||||
description="[AC-IDSMETA-22] 更新拆解模板,支持状态切换",
|
||||
)
|
||||
async def update_template(
|
||||
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
id: str,
|
||||
template_update: DecompositionTemplateUpdate,
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
[AC-IDSMETA-22] 更新拆解模板
|
||||
"""
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Updating decomposition template: "
|
||||
f"tenant={tenant_id}, id={id}"
|
||||
)
|
||||
|
||||
if template_update.status and template_update.status not in [s.value for s in DecompositionTemplateStatus]:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={
|
||||
"code": "INVALID_STATUS",
|
||||
"message": f"Invalid status: {template_update.status}",
|
||||
"details": {
|
||||
"valid_values": [s.value for s in DecompositionTemplateStatus]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
service = DecompositionTemplateService(session)
|
||||
template = await service.update_template(tenant_id, id, template_update)
|
||||
|
||||
if not template:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content={
|
||||
"code": "NOT_FOUND",
|
||||
"message": f"Template {id} not found",
|
||||
}
|
||||
)
|
||||
|
||||
await session.commit()
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"id": str(template.id),
|
||||
"name": template.name,
|
||||
"description": template.description,
|
||||
"version": template.version,
|
||||
"status": template.status,
|
||||
"template_schema": template.template_schema,
|
||||
"extraction_hints": template.extraction_hints,
|
||||
"example_input": template.example_input,
|
||||
"example_output": template.example_output,
|
||||
"created_at": template.created_at.isoformat(),
|
||||
"updated_at": template.updated_at.isoformat(),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/decompose",
|
||||
operation_id="decomposeText",
|
||||
summary="Decompose text to structured data",
|
||||
description="[AC-IDSMETA-21] 将待录入文本拆解为固定模板输出",
|
||||
)
|
||||
async def decompose_text(
|
||||
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
request: DecompositionRequest,
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
[AC-IDSMETA-21] 将待录入文本拆解为固定模板输出
|
||||
|
||||
如果不指定 template_id,则使用最近生效的发布版本模板
|
||||
"""
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-21] Decomposing text: tenant={tenant_id}, "
|
||||
f"template_id={request.template_id}, text_length={len(request.text)}"
|
||||
)
|
||||
|
||||
from app.services.llm import get_llm_client
|
||||
llm_client = get_llm_client()
|
||||
|
||||
service = DecompositionTemplateService(session, llm_client)
|
||||
result = await service.decompose_text(tenant_id, request)
|
||||
|
||||
if not result.success:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={
|
||||
"code": "DECOMPOSITION_FAILED",
|
||||
"message": result.error,
|
||||
"details": {
|
||||
"template_id": result.template_id,
|
||||
"template_version": result.template_version,
|
||||
"latency_ms": result.latency_ms,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"success": result.success,
|
||||
"data": result.data,
|
||||
"template_id": result.template_id,
|
||||
"template_version": result.template_version,
|
||||
"confidence": result.confidence,
|
||||
"latency_ms": result.latency_ms,
|
||||
}
|
||||
)
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
"""
|
||||
Decomposition Template Service.
|
||||
[AC-IDSMETA-21, AC-IDSMETA-22] 拆解模板服务,支持文本拆解为结构化数据。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlmodel import col
|
||||
|
||||
from app.models.entities import (
|
||||
DecompositionTemplate,
|
||||
DecompositionTemplateCreate,
|
||||
DecompositionTemplateStatus,
|
||||
DecompositionTemplateUpdate,
|
||||
DecompositionRequest,
|
||||
DecompositionResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DecompositionTemplateService:
|
||||
"""
|
||||
[AC-IDSMETA-22] 拆解模板服务
|
||||
管理拆解模板,支持版本控制和最近生效版本查询
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession, llm_client=None):
|
||||
self._session = session
|
||||
self._llm_client = llm_client
|
||||
|
||||
async def list_templates(
|
||||
self,
|
||||
tenant_id: str,
|
||||
status: str | None = None,
|
||||
) -> list[DecompositionTemplate]:
|
||||
"""
|
||||
[AC-IDSMETA-22] 列出租户所有拆解模板
|
||||
|
||||
Args:
|
||||
tenant_id: 租户 ID
|
||||
status: 按状态过滤(draft/published/archived)
|
||||
|
||||
Returns:
|
||||
DecompositionTemplate 列表
|
||||
"""
|
||||
stmt = select(DecompositionTemplate).where(
|
||||
DecompositionTemplate.tenant_id == tenant_id,
|
||||
)
|
||||
|
||||
if status:
|
||||
stmt = stmt.where(DecompositionTemplate.status == status)
|
||||
|
||||
stmt = stmt.order_by(col(DecompositionTemplate.created_at).desc())
|
||||
|
||||
result = await self._session.execute(stmt)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_template(
|
||||
self,
|
||||
tenant_id: str,
|
||||
template_id: str,
|
||||
) -> DecompositionTemplate | None:
|
||||
"""
|
||||
获取单个模板
|
||||
|
||||
Args:
|
||||
tenant_id: 租户 ID
|
||||
template_id: 模板 ID
|
||||
|
||||
Returns:
|
||||
DecompositionTemplate 或 None
|
||||
"""
|
||||
stmt = select(DecompositionTemplate).where(
|
||||
DecompositionTemplate.tenant_id == tenant_id,
|
||||
DecompositionTemplate.id == uuid.UUID(template_id),
|
||||
)
|
||||
result = await self._session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_latest_published_template(
|
||||
self,
|
||||
tenant_id: str,
|
||||
) -> DecompositionTemplate | None:
|
||||
"""
|
||||
[AC-IDSMETA-22] 获取最近生效的发布版本模板
|
||||
|
||||
Args:
|
||||
tenant_id: 租户 ID
|
||||
|
||||
Returns:
|
||||
状态为 published 的最新模板
|
||||
"""
|
||||
stmt = select(DecompositionTemplate).where(
|
||||
DecompositionTemplate.tenant_id == tenant_id,
|
||||
DecompositionTemplate.status == DecompositionTemplateStatus.PUBLISHED.value,
|
||||
).order_by(col(DecompositionTemplate.updated_at).desc()).limit(1)
|
||||
|
||||
result = await self._session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def create_template(
|
||||
self,
|
||||
tenant_id: str,
|
||||
template_create: DecompositionTemplateCreate,
|
||||
) -> DecompositionTemplate:
|
||||
"""
|
||||
[AC-IDSMETA-22] 创建拆解模板
|
||||
|
||||
Args:
|
||||
tenant_id: 租户 ID
|
||||
template_create: 创建数据
|
||||
|
||||
Returns:
|
||||
创建的 DecompositionTemplate
|
||||
"""
|
||||
template = DecompositionTemplate(
|
||||
tenant_id=tenant_id,
|
||||
name=template_create.name,
|
||||
description=template_create.description,
|
||||
template_schema=template_create.template_schema,
|
||||
extraction_hints=template_create.extraction_hints,
|
||||
example_input=template_create.example_input,
|
||||
example_output=template_create.example_output,
|
||||
version=1,
|
||||
status=DecompositionTemplateStatus.DRAFT.value,
|
||||
)
|
||||
|
||||
self._session.add(template)
|
||||
await self._session.flush()
|
||||
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Created decomposition template: tenant={tenant_id}, "
|
||||
f"id={template.id}, name={template.name}"
|
||||
)
|
||||
|
||||
return template
|
||||
|
||||
async def update_template(
|
||||
self,
|
||||
tenant_id: str,
|
||||
template_id: str,
|
||||
template_update: DecompositionTemplateUpdate,
|
||||
) -> DecompositionTemplate | None:
|
||||
"""
|
||||
[AC-IDSMETA-22] 更新拆解模板
|
||||
|
||||
Args:
|
||||
tenant_id: 租户 ID
|
||||
template_id: 模板 ID
|
||||
template_update: 更新数据
|
||||
|
||||
Returns:
|
||||
更新后的 DecompositionTemplate 或 None
|
||||
"""
|
||||
template = await self.get_template(tenant_id, template_id)
|
||||
if not template:
|
||||
return None
|
||||
|
||||
if template_update.name is not None:
|
||||
template.name = template_update.name
|
||||
if template_update.description is not None:
|
||||
template.description = template_update.description
|
||||
if template_update.template_schema is not None:
|
||||
template.template_schema = template_update.template_schema
|
||||
if template_update.extraction_hints is not None:
|
||||
template.extraction_hints = template_update.extraction_hints
|
||||
if template_update.example_input is not None:
|
||||
template.example_input = template_update.example_input
|
||||
if template_update.example_output is not None:
|
||||
template.example_output = template_update.example_output
|
||||
if template_update.status is not None:
|
||||
old_status = template.status
|
||||
template.status = template_update.status
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Template status changed: tenant={tenant_id}, "
|
||||
f"id={template_id}, {old_status} -> {template.status}"
|
||||
)
|
||||
|
||||
template.version += 1
|
||||
template.updated_at = datetime.utcnow()
|
||||
await self._session.flush()
|
||||
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-22] Updated decomposition template: tenant={tenant_id}, "
|
||||
f"id={template_id}, version={template.version}"
|
||||
)
|
||||
|
||||
return template
|
||||
|
||||
async def decompose_text(
|
||||
self,
|
||||
tenant_id: str,
|
||||
request: DecompositionRequest,
|
||||
) -> DecompositionResult:
|
||||
"""
|
||||
[AC-IDSMETA-21] 将待录入文本拆解为固定模板输出
|
||||
|
||||
Args:
|
||||
tenant_id: 租户 ID
|
||||
request: 拆解请求
|
||||
|
||||
Returns:
|
||||
DecompositionResult 包含拆解后的结构化数据
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-21] Starting text decomposition: tenant={tenant_id}, "
|
||||
f"template_id={request.template_id}, text_length={len(request.text)}"
|
||||
)
|
||||
|
||||
# Get template
|
||||
if request.template_id:
|
||||
template = await self.get_template(tenant_id, request.template_id)
|
||||
else:
|
||||
template = await self.get_latest_published_template(tenant_id)
|
||||
|
||||
if not template:
|
||||
logger.warning(f"[AC-IDSMETA-21] No template found for tenant={tenant_id}")
|
||||
return DecompositionResult(
|
||||
success=False,
|
||||
error="No decomposition template found",
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
if template.status != DecompositionTemplateStatus.PUBLISHED.value:
|
||||
logger.warning(
|
||||
f"[AC-IDSMETA-21] Template not published: id={template.id}, "
|
||||
f"status={template.status}"
|
||||
)
|
||||
return DecompositionResult(
|
||||
success=False,
|
||||
error=f"Template status is '{template.status}', not published",
|
||||
template_id=str(template.id),
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
# Build prompt for LLM
|
||||
prompt = self._build_extraction_prompt(template, request.text, request.hints)
|
||||
|
||||
# Call LLM to extract structured data
|
||||
try:
|
||||
if not self._llm_client:
|
||||
logger.warning("[AC-IDSMETA-21] No LLM client configured")
|
||||
return DecompositionResult(
|
||||
success=False,
|
||||
error="LLM client not configured",
|
||||
template_id=str(template.id),
|
||||
template_version=template.version,
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
llm_response = await self._call_llm(prompt)
|
||||
|
||||
# Parse LLM response as JSON
|
||||
try:
|
||||
# Try to extract JSON from response
|
||||
json_str = self._extract_json_from_response(llm_response)
|
||||
data = json.loads(json_str)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"[AC-IDSMETA-21] Failed to parse LLM response as JSON: {e}")
|
||||
return DecompositionResult(
|
||||
success=False,
|
||||
error=f"Failed to parse LLM response: {str(e)}",
|
||||
template_id=str(template.id),
|
||||
template_version=template.version,
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
logger.info(
|
||||
f"[AC-IDSMETA-21] Text decomposition complete: tenant={tenant_id}, "
|
||||
f"template_id={template.id}, version={template.version}, "
|
||||
f"latency_ms={latency_ms}"
|
||||
)
|
||||
|
||||
return DecompositionResult(
|
||||
success=True,
|
||||
data=data,
|
||||
template_id=str(template.id),
|
||||
template_version=template.version,
|
||||
confidence=0.9, # TODO: Calculate actual confidence
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[AC-IDSMETA-21] LLM call failed: {e}", exc_info=True)
|
||||
return DecompositionResult(
|
||||
success=False,
|
||||
error=f"LLM call failed: {str(e)}",
|
||||
template_id=str(template.id),
|
||||
template_version=template.version,
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
def _build_extraction_prompt(
|
||||
self,
|
||||
template: DecompositionTemplate,
|
||||
text: str,
|
||||
hints: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
构建 LLM 提取提示
|
||||
|
||||
Args:
|
||||
template: 拆解模板
|
||||
text: 待拆解文本
|
||||
hints: 额外提示
|
||||
|
||||
Returns:
|
||||
LLM 提示字符串
|
||||
"""
|
||||
schema_desc = json.dumps(template.template_schema, ensure_ascii=False, indent=2)
|
||||
|
||||
prompt_parts = [
|
||||
"你是一个数据提取助手。请根据以下模板结构,从给定的文本中提取结构化数据。",
|
||||
"",
|
||||
"## 输出模板结构",
|
||||
"```json",
|
||||
schema_desc,
|
||||
"```",
|
||||
"",
|
||||
]
|
||||
|
||||
if template.extraction_hints:
|
||||
hints_desc = json.dumps(template.extraction_hints, ensure_ascii=False, indent=2)
|
||||
prompt_parts.extend([
|
||||
"## 提取提示",
|
||||
"```json",
|
||||
hints_desc,
|
||||
"```",
|
||||
"",
|
||||
])
|
||||
|
||||
if hints:
|
||||
extra_hints = json.dumps(hints, ensure_ascii=False, indent=2)
|
||||
prompt_parts.extend([
|
||||
"## 额外提示",
|
||||
"```json",
|
||||
extra_hints,
|
||||
"```",
|
||||
"",
|
||||
])
|
||||
|
||||
if template.example_input and template.example_output:
|
||||
prompt_parts.extend([
|
||||
"## 示例",
|
||||
f"输入: {template.example_input}",
|
||||
f"输出: ```json",
|
||||
json.dumps(template.example_output, ensure_ascii=False, indent=2),
|
||||
"```",
|
||||
"",
|
||||
])
|
||||
|
||||
prompt_parts.extend([
|
||||
"## 待提取文本",
|
||||
text,
|
||||
"",
|
||||
"## 输出要求",
|
||||
"请直接输出 JSON 格式的提取结果,不要包含任何解释或额外文本。",
|
||||
"如果某个字段无法从文本中提取,请使用 null 作为值。",
|
||||
])
|
||||
|
||||
return "\n".join(prompt_parts)
|
||||
|
||||
async def _call_llm(self, prompt: str) -> str:
|
||||
"""
|
||||
调用 LLM 获取响应
|
||||
|
||||
Args:
|
||||
prompt: 提示字符串
|
||||
|
||||
Returns:
|
||||
LLM 响应字符串
|
||||
"""
|
||||
if hasattr(self._llm_client, 'chat'):
|
||||
response = await self._llm_client.chat(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.1,
|
||||
)
|
||||
return response.content if hasattr(response, 'content') else str(response)
|
||||
else:
|
||||
raise NotImplementedError("LLM client does not support chat method")
|
||||
|
||||
def _extract_json_from_response(self, response: str) -> str:
|
||||
"""
|
||||
从 LLM 响应中提取 JSON 字符串
|
||||
|
||||
Args:
|
||||
response: LLM 响应字符串
|
||||
|
||||
Returns:
|
||||
JSON 字符串
|
||||
"""
|
||||
import re
|
||||
|
||||
# Try to find JSON in code blocks
|
||||
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', response)
|
||||
if json_match:
|
||||
return json_match.group(1).strip()
|
||||
|
||||
# Try to find JSON object directly
|
||||
json_match = re.search(r'\{[\s\S]*\}', response)
|
||||
if json_match:
|
||||
return json_match.group(0)
|
||||
|
||||
return response.strip()
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
-- Migration: Create metadata_field_definitions table
|
||||
-- Date: 2026-03-02
|
||||
-- Issue: [AC-IDSMETA-13] 元数据字段定义表,支持字段级状态治理
|
||||
|
||||
-- Create metadata_field_definitions table
|
||||
CREATE TABLE IF NOT EXISTS metadata_field_definitions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id VARCHAR NOT NULL,
|
||||
field_key VARCHAR(64) NOT NULL,
|
||||
label VARCHAR(64) NOT NULL,
|
||||
type VARCHAR NOT NULL DEFAULT 'string',
|
||||
required BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
options JSONB,
|
||||
default_value JSONB,
|
||||
scope JSONB NOT NULL DEFAULT '["kb_document"]',
|
||||
is_filterable BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
is_rank_feature BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
status VARCHAR NOT NULL DEFAULT 'draft',
|
||||
version INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS ix_metadata_field_definitions_tenant
|
||||
ON metadata_field_definitions (tenant_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_metadata_field_definitions_tenant_status
|
||||
ON metadata_field_definitions (tenant_id, status);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ix_metadata_field_definitions_tenant_field_key
|
||||
ON metadata_field_definitions (tenant_id, field_key);
|
||||
|
||||
-- Add comments
|
||||
COMMENT ON TABLE metadata_field_definitions IS '[AC-IDSMETA-13] 元数据字段定义表';
|
||||
COMMENT ON COLUMN metadata_field_definitions.field_key IS '字段键名,仅允许小写字母数字下划线';
|
||||
COMMENT ON COLUMN metadata_field_definitions.type IS '字段类型: string/number/boolean/enum/array_enum';
|
||||
COMMENT ON COLUMN metadata_field_definitions.scope IS '适用范围: kb_document/intent_rule/script_flow/prompt_template';
|
||||
COMMENT ON COLUMN metadata_field_definitions.status IS '字段状态: draft/active/deprecated';
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
-- Migration: Create decomposition_templates table and add metadata fields
|
||||
-- Date: 2026-03-02
|
||||
-- Issue: [AC-IDSMETA-16, AC-IDSMETA-22] Add metadata fields and decomposition templates
|
||||
|
||||
-- Create decomposition_templates table
|
||||
CREATE TABLE IF NOT EXISTS decomposition_templates (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id VARCHAR NOT NULL,
|
||||
name VARCHAR NOT NULL,
|
||||
description TEXT,
|
||||
version INTEGER NOT NULL DEFAULT 1,
|
||||
status VARCHAR NOT NULL DEFAULT 'draft',
|
||||
template_schema JSONB NOT NULL DEFAULT '{}',
|
||||
extraction_hints JSONB,
|
||||
example_input TEXT,
|
||||
example_output JSONB,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create indexes for decomposition_templates
|
||||
CREATE INDEX IF NOT EXISTS ix_decomposition_templates_tenant
|
||||
ON decomposition_templates (tenant_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_decomposition_templates_tenant_status
|
||||
ON decomposition_templates (tenant_id, status);
|
||||
|
||||
-- Add metadata column to intent_rules if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'intent_rules' AND column_name = 'metadata') THEN
|
||||
ALTER TABLE intent_rules ADD COLUMN metadata JSONB;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add metadata column to script_flows if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'script_flows' AND column_name = 'metadata') THEN
|
||||
ALTER TABLE script_flows ADD COLUMN metadata JSONB;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add metadata column to prompt_templates if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'prompt_templates' AND column_name = 'metadata') THEN
|
||||
ALTER TABLE prompt_templates ADD COLUMN metadata JSONB;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add comments
|
||||
COMMENT ON TABLE decomposition_templates IS '[AC-IDSMETA-22] 拆解模板表';
|
||||
COMMENT ON COLUMN decomposition_templates.status IS '模板状态: draft/published/archived';
|
||||
COMMENT ON COLUMN decomposition_templates.template_schema IS '输出模板结构定义';
|
||||
COMMENT ON COLUMN intent_rules.metadata IS '[AC-IDSMETA-16] 结构化元数据';
|
||||
COMMENT ON COLUMN script_flows.metadata IS '[AC-IDSMETA-16] 结构化元数据';
|
||||
COMMENT ON COLUMN prompt_templates.metadata IS '[AC-IDSMETA-16] 结构化元数据';
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
-- ============================================================================
|
||||
-- Metadata Governance 完整迁移脚本
|
||||
-- Date: 2026-03-02
|
||||
-- Issue: [AC-IDSMETA-13~22] 元数据治理功能数据库变更
|
||||
-- ============================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- Part 1: 创建 metadata_field_definitions 表 [AC-IDSMETA-13]
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS metadata_field_definitions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id VARCHAR NOT NULL,
|
||||
field_key VARCHAR(64) NOT NULL,
|
||||
label VARCHAR(64) NOT NULL,
|
||||
type VARCHAR NOT NULL DEFAULT 'string',
|
||||
required BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
options JSONB,
|
||||
default_value JSONB,
|
||||
scope JSONB NOT NULL DEFAULT '["kb_document"]',
|
||||
is_filterable BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
is_rank_feature BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
status VARCHAR NOT NULL DEFAULT 'draft',
|
||||
version INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 索引
|
||||
CREATE INDEX IF NOT EXISTS ix_metadata_field_definitions_tenant
|
||||
ON metadata_field_definitions (tenant_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_metadata_field_definitions_tenant_status
|
||||
ON metadata_field_definitions (tenant_id, status);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ix_metadata_field_definitions_tenant_field_key
|
||||
ON metadata_field_definitions (tenant_id, field_key);
|
||||
|
||||
-- 注释
|
||||
COMMENT ON TABLE metadata_field_definitions IS '[AC-IDSMETA-13] 元数据字段定义表';
|
||||
COMMENT ON COLUMN metadata_field_definitions.field_key IS '字段键名,仅允许小写字母数字下划线';
|
||||
COMMENT ON COLUMN metadata_field_definitions.type IS '字段类型: string/number/boolean/enum/array_enum';
|
||||
COMMENT ON COLUMN metadata_field_definitions.scope IS '适用范围: kb_document/intent_rule/script_flow/prompt_template';
|
||||
COMMENT ON COLUMN metadata_field_definitions.status IS '字段状态: draft/active/deprecated';
|
||||
|
||||
-- ============================================================================
|
||||
-- Part 2: 创建 decomposition_templates 表 [AC-IDSMETA-22]
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS decomposition_templates (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id VARCHAR NOT NULL,
|
||||
name VARCHAR NOT NULL,
|
||||
description TEXT,
|
||||
version INTEGER NOT NULL DEFAULT 1,
|
||||
status VARCHAR NOT NULL DEFAULT 'draft',
|
||||
template_schema JSONB NOT NULL DEFAULT '{}',
|
||||
extraction_hints JSONB,
|
||||
example_input TEXT,
|
||||
example_output JSONB,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 索引
|
||||
CREATE INDEX IF NOT EXISTS ix_decomposition_templates_tenant
|
||||
ON decomposition_templates (tenant_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_decomposition_templates_tenant_status
|
||||
ON decomposition_templates (tenant_id, status);
|
||||
|
||||
-- 注释
|
||||
COMMENT ON TABLE decomposition_templates IS '[AC-IDSMETA-22] 拆解模板表';
|
||||
COMMENT ON COLUMN decomposition_templates.status IS '模板状态: draft/published/archived';
|
||||
COMMENT ON COLUMN decomposition_templates.template_schema IS '输出模板结构定义';
|
||||
|
||||
-- ============================================================================
|
||||
-- Part 3: 为现有表添加 metadata 字段 [AC-IDSMETA-16]
|
||||
-- ============================================================================
|
||||
|
||||
-- intent_rules 表添加 metadata 字段
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'intent_rules' AND column_name = 'metadata') THEN
|
||||
ALTER TABLE intent_rules ADD COLUMN metadata JSONB;
|
||||
COMMENT ON COLUMN intent_rules.metadata IS '[AC-IDSMETA-16] 结构化元数据';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- script_flows 表添加 metadata 字段
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'script_flows' AND column_name = 'metadata') THEN
|
||||
ALTER TABLE script_flows ADD COLUMN metadata JSONB;
|
||||
COMMENT ON COLUMN script_flows.metadata IS '[AC-IDSMETA-16] 结构化元数据';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- prompt_templates 表添加 metadata 字段
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'prompt_templates' AND column_name = 'metadata') THEN
|
||||
ALTER TABLE prompt_templates ADD COLUMN metadata JSONB;
|
||||
COMMENT ON COLUMN prompt_templates.metadata IS '[AC-IDSMETA-16] 结构化元数据';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- ============================================================================
|
||||
-- Part 4: 插入示例数据(可选)
|
||||
-- ============================================================================
|
||||
|
||||
-- 示例:插入默认元数据字段定义(教育行业场景)
|
||||
-- INSERT INTO metadata_field_definitions (tenant_id, field_key, label, type, required, options, scope, status)
|
||||
-- VALUES
|
||||
-- ('default_tenant', 'grade', '年级', 'enum', true, '["初一", "初二", "初三", "高一", "高二", "高三"]', '["kb_document", "intent_rule"]', 'active'),
|
||||
-- ('default_tenant', 'subject', '学科', 'enum', true, '["语文", "数学", "英语", "物理", "化学", "生物"]', '["kb_document", "intent_rule"]', 'active'),
|
||||
-- ('default_tenant', 'scene', '场景', 'enum', false, '["痛点", "知识点", "练习题", "试卷"]', '["kb_document"]', 'active');
|
||||
|
||||
-- ============================================================================
|
||||
-- 验证脚本
|
||||
-- ============================================================================
|
||||
|
||||
-- 验证表是否创建成功
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name IN ('metadata_field_definitions', 'decomposition_templates');
|
||||
|
||||
-- 验证列是否添加成功
|
||||
SELECT table_name, column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_name IN ('intent_rules', 'script_flows', 'prompt_templates')
|
||||
AND column_name = 'metadata';
|
||||
Loading…
Reference in New Issue