feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
"""
|
|
|
|
|
|
Embedding provider factory and configuration manager.
|
|
|
|
|
|
[AC-AISVC-30, AC-AISVC-31] Factory pattern for dynamic provider loading.
|
|
|
|
|
|
|
|
|
|
|
|
Design reference: progress.md Section 7.1 - Architecture
|
|
|
|
|
|
- EmbeddingProviderFactory: creates providers based on config
|
|
|
|
|
|
- EmbeddingConfigManager: manages configuration with hot-reload support
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
from typing import Any, Type
|
|
|
|
|
|
|
|
|
|
|
|
from app.services.embedding.base import EmbeddingException, EmbeddingProvider
|
|
|
|
|
|
from app.services.embedding.ollama_provider import OllamaEmbeddingProvider
|
|
|
|
|
|
from app.services.embedding.openai_provider import OpenAIEmbeddingProvider
|
2026-02-25 15:10:12 +00:00
|
|
|
|
from app.services.embedding.nomic_provider import NomicEmbeddingProvider
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EmbeddingProviderFactory:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Factory for creating embedding providers.
|
|
|
|
|
|
[AC-AISVC-30] Supports dynamic loading based on configuration.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
_providers: dict[str, Type[EmbeddingProvider]] = {
|
|
|
|
|
|
"ollama": OllamaEmbeddingProvider,
|
|
|
|
|
|
"openai": OpenAIEmbeddingProvider,
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"nomic": NomicEmbeddingProvider,
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def register_provider(cls, name: str, provider_class: Type[EmbeddingProvider]) -> None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Register a new embedding provider.
|
|
|
|
|
|
[AC-AISVC-30] Allows runtime registration of providers.
|
|
|
|
|
|
"""
|
|
|
|
|
|
cls._providers[name] = provider_class
|
|
|
|
|
|
logger.info(f"Registered embedding provider: {name}")
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def get_available_providers(cls) -> list[str]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get list of available provider names.
|
|
|
|
|
|
[AC-AISVC-38] Returns registered provider identifiers.
|
|
|
|
|
|
"""
|
|
|
|
|
|
return list(cls._providers.keys())
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def get_provider_info(cls, name: str) -> dict[str, Any]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get provider information including config schema.
|
|
|
|
|
|
[AC-AISVC-38] Returns provider metadata.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if name not in cls._providers:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Unknown provider: {name}",
|
|
|
|
|
|
provider="factory"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
provider_class = cls._providers[name]
|
|
|
|
|
|
temp_instance = provider_class.__new__(provider_class)
|
|
|
|
|
|
|
|
|
|
|
|
display_names = {
|
|
|
|
|
|
"ollama": "Ollama 本地模型",
|
|
|
|
|
|
"openai": "OpenAI Embedding",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"nomic": "Nomic Embed (优化版)",
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
descriptions = {
|
|
|
|
|
|
"ollama": "使用 Ollama 运行的本地嵌入模型,支持 nomic-embed-text 等开源模型",
|
|
|
|
|
|
"openai": "使用 OpenAI 官方 Embedding API,支持 text-embedding-3 系列模型",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"nomic": "Nomic-embed-text v1.5 优化版,支持任务前缀和 Matryoshka 维度截断,专为RAG优化",
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-26 06:45:21 +00:00
|
|
|
|
raw_schema = temp_instance.get_config_schema()
|
|
|
|
|
|
|
|
|
|
|
|
properties = {}
|
|
|
|
|
|
required = []
|
|
|
|
|
|
for key, field in raw_schema.items():
|
|
|
|
|
|
properties[key] = {
|
|
|
|
|
|
"type": field.get("type", "string"),
|
|
|
|
|
|
"title": field.get("title", key),
|
|
|
|
|
|
"description": field.get("description", ""),
|
|
|
|
|
|
"default": field.get("default"),
|
|
|
|
|
|
}
|
|
|
|
|
|
if field.get("enum"):
|
|
|
|
|
|
properties[key]["enum"] = field.get("enum")
|
|
|
|
|
|
if field.get("minimum") is not None:
|
|
|
|
|
|
properties[key]["minimum"] = field.get("minimum")
|
|
|
|
|
|
if field.get("maximum") is not None:
|
|
|
|
|
|
properties[key]["maximum"] = field.get("maximum")
|
|
|
|
|
|
if field.get("required"):
|
|
|
|
|
|
required.append(key)
|
|
|
|
|
|
|
|
|
|
|
|
config_schema = {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"properties": properties,
|
|
|
|
|
|
}
|
|
|
|
|
|
if required:
|
|
|
|
|
|
config_schema["required"] = required
|
|
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
return {
|
|
|
|
|
|
"name": name,
|
|
|
|
|
|
"display_name": display_names.get(name, name),
|
|
|
|
|
|
"description": descriptions.get(name, ""),
|
2026-02-26 06:45:21 +00:00
|
|
|
|
"config_schema": config_schema,
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def create_provider(
|
|
|
|
|
|
cls,
|
|
|
|
|
|
name: str,
|
|
|
|
|
|
config: dict[str, Any],
|
|
|
|
|
|
) -> EmbeddingProvider:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Create an embedding provider instance.
|
|
|
|
|
|
[AC-AISVC-30] Creates provider based on configuration.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
name: Provider identifier (e.g., "ollama", "openai")
|
|
|
|
|
|
config: Provider-specific configuration
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Configured EmbeddingProvider instance
|
|
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
|
EmbeddingException: If provider is unknown or configuration is invalid
|
|
|
|
|
|
"""
|
|
|
|
|
|
if name not in cls._providers:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Unknown embedding provider: {name}. "
|
|
|
|
|
|
f"Available: {cls.get_available_providers()}",
|
|
|
|
|
|
provider="factory"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
provider_class = cls._providers[name]
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
instance = provider_class(**config)
|
|
|
|
|
|
logger.info(f"Created embedding provider: {name}")
|
|
|
|
|
|
return instance
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Failed to create provider '{name}': {e}",
|
|
|
|
|
|
provider="factory",
|
|
|
|
|
|
details={"config": config}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EmbeddingConfigManager:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Manager for embedding configuration.
|
|
|
|
|
|
[AC-AISVC-31] Supports hot-reload of configuration.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, default_provider: str = "ollama", default_config: dict[str, Any] | None = None):
|
|
|
|
|
|
self._provider_name = default_provider
|
|
|
|
|
|
self._config = default_config or {
|
|
|
|
|
|
"base_url": "http://localhost:11434",
|
|
|
|
|
|
"model": "nomic-embed-text",
|
|
|
|
|
|
"dimension": 768,
|
|
|
|
|
|
}
|
|
|
|
|
|
self._provider: EmbeddingProvider | None = None
|
|
|
|
|
|
|
|
|
|
|
|
def get_provider_name(self) -> str:
|
|
|
|
|
|
"""Get current provider name."""
|
|
|
|
|
|
return self._provider_name
|
|
|
|
|
|
|
|
|
|
|
|
def get_config(self) -> dict[str, Any]:
|
|
|
|
|
|
"""Get current configuration."""
|
|
|
|
|
|
return self._config.copy()
|
|
|
|
|
|
|
|
|
|
|
|
def get_full_config(self) -> dict[str, Any]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get full configuration including provider name.
|
|
|
|
|
|
[AC-AISVC-39] Returns complete configuration for API response.
|
|
|
|
|
|
"""
|
|
|
|
|
|
return {
|
|
|
|
|
|
"provider": self._provider_name,
|
|
|
|
|
|
"config": self._config.copy(),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def get_provider(self) -> EmbeddingProvider:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get or create the embedding provider.
|
|
|
|
|
|
[AC-AISVC-29] Returns configured provider instance.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self._provider is None:
|
|
|
|
|
|
self._provider = EmbeddingProviderFactory.create_provider(
|
|
|
|
|
|
self._provider_name,
|
|
|
|
|
|
self._config
|
|
|
|
|
|
)
|
|
|
|
|
|
return self._provider
|
|
|
|
|
|
|
|
|
|
|
|
async def update_config(
|
|
|
|
|
|
self,
|
|
|
|
|
|
provider: str,
|
|
|
|
|
|
config: dict[str, Any],
|
|
|
|
|
|
) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Update embedding configuration.
|
|
|
|
|
|
[AC-AISVC-31, AC-AISVC-40] Supports hot-reload.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
provider: New provider name
|
|
|
|
|
|
config: New provider configuration
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
True if update was successful
|
|
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
|
EmbeddingException: If configuration is invalid
|
|
|
|
|
|
"""
|
|
|
|
|
|
old_provider = self._provider_name
|
|
|
|
|
|
old_config = self._config.copy()
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
new_provider_instance = EmbeddingProviderFactory.create_provider(
|
|
|
|
|
|
provider,
|
|
|
|
|
|
config
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if self._provider:
|
|
|
|
|
|
await self._provider.close()
|
|
|
|
|
|
|
|
|
|
|
|
self._provider_name = provider
|
|
|
|
|
|
self._config = config
|
|
|
|
|
|
self._provider = new_provider_instance
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Updated embedding config: provider={provider}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
self._provider_name = old_provider
|
|
|
|
|
|
self._config = old_config
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Failed to update config: {e}",
|
|
|
|
|
|
provider="config_manager",
|
|
|
|
|
|
details={"provider": provider, "config": config}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def test_connection(
|
|
|
|
|
|
self,
|
|
|
|
|
|
test_text: str = "这是一个测试文本",
|
|
|
|
|
|
provider: str | None = None,
|
|
|
|
|
|
config: dict[str, Any] | None = None,
|
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Test embedding connection.
|
|
|
|
|
|
[AC-AISVC-41] Tests provider connectivity.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
test_text: Text to embed for testing
|
|
|
|
|
|
provider: Provider to test (uses current if None)
|
|
|
|
|
|
config: Config to test (uses current if None)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Dict with test results including success, dimension, latency
|
|
|
|
|
|
"""
|
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
test_provider_name = provider or self._provider_name
|
|
|
|
|
|
test_config = config or self._config
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
test_provider = EmbeddingProviderFactory.create_provider(
|
|
|
|
|
|
test_provider_name,
|
|
|
|
|
|
test_config
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
start_time = time.perf_counter()
|
|
|
|
|
|
embedding = await test_provider.embed(test_text)
|
|
|
|
|
|
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
|
|
|
|
|
|
|
|
|
|
await test_provider.close()
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"dimension": len(embedding),
|
|
|
|
|
|
"latency_ms": latency_ms,
|
|
|
|
|
|
"message": f"连接成功,向量维度: {len(embedding)}",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": False,
|
|
|
|
|
|
"dimension": 0,
|
|
|
|
|
|
"latency_ms": 0,
|
|
|
|
|
|
"error": str(e),
|
|
|
|
|
|
"message": f"连接失败: {e}",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def close(self) -> None:
|
|
|
|
|
|
"""Close the current provider."""
|
|
|
|
|
|
if self._provider:
|
|
|
|
|
|
await self._provider.close()
|
|
|
|
|
|
self._provider = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_embedding_config_manager: EmbeddingConfigManager | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_embedding_config_manager() -> EmbeddingConfigManager:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get the global embedding config manager.
|
|
|
|
|
|
[AC-AISVC-31] Singleton pattern for configuration management.
|
|
|
|
|
|
"""
|
|
|
|
|
|
global _embedding_config_manager
|
|
|
|
|
|
if _embedding_config_manager is None:
|
|
|
|
|
|
from app.core.config import get_settings
|
|
|
|
|
|
settings = get_settings()
|
|
|
|
|
|
|
|
|
|
|
|
_embedding_config_manager = EmbeddingConfigManager(
|
2026-02-26 06:45:21 +00:00
|
|
|
|
default_provider="nomic",
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
default_config={
|
|
|
|
|
|
"base_url": settings.ollama_base_url,
|
|
|
|
|
|
"model": settings.ollama_embedding_model,
|
|
|
|
|
|
"dimension": settings.qdrant_vector_size,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
return _embedding_config_manager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_embedding_provider() -> EmbeddingProvider:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get the current embedding provider.
|
|
|
|
|
|
[AC-AISVC-29] Convenience function for getting provider.
|
|
|
|
|
|
"""
|
|
|
|
|
|
manager = get_embedding_config_manager()
|
|
|
|
|
|
return await manager.get_provider()
|