""" Nomic embedding provider with task prefixes and Matryoshka support. Implements RAG optimization spec: - Task prefixes: search_document: / search_query: - Matryoshka dimension truncation: 256/512/768 dimensions """ import logging import time from dataclasses import dataclass, field from enum import Enum from typing import Any import httpx import numpy as np from app.services.embedding.base import ( EmbeddingConfig, EmbeddingException, EmbeddingProvider, ) logger = logging.getLogger(__name__) class EmbeddingTask(str, Enum): """Task type for nomic-embed-text v1.5 model.""" DOCUMENT = "search_document" QUERY = "search_query" @dataclass class NomicEmbeddingResult: """Result from Nomic embedding with multiple dimensions.""" embedding_full: list[float] embedding_256: list[float] embedding_512: list[float] dimension: int model: str task: EmbeddingTask latency_ms: float = 0.0 metadata: dict[str, Any] = field(default_factory=dict) class NomicEmbeddingProvider(EmbeddingProvider): """ Nomic-embed-text v1.5 embedding provider with task prefixes. Key features: - Task prefixes: search_document: for documents, search_query: for queries - Matryoshka dimension truncation: 256/512/768 dimensions - Automatic normalization after truncation Reference: rag-optimization/spec.md Section 2.1, 2.3 """ PROVIDER_NAME = "nomic" DOCUMENT_PREFIX = "search_document:" QUERY_PREFIX = "search_query:" FULL_DIMENSION = 768 def __init__( self, base_url: str = "http://localhost:11434", model: str = "nomic-embed-text", dimension: int = 768, timeout_seconds: int = 60, enable_matryoshka: bool = True, **kwargs: Any, ): self._base_url = base_url.rstrip("/") self._model = model self._dimension = dimension self._timeout = timeout_seconds self._enable_matryoshka = enable_matryoshka self._client: httpx.AsyncClient | None = None self._extra_config = kwargs async def _get_client(self) -> httpx.AsyncClient: if self._client is None: self._client = httpx.AsyncClient(timeout=self._timeout) return self._client def _add_prefix(self, text: str, task: EmbeddingTask) -> str: """Add task prefix to text.""" if task == EmbeddingTask.DOCUMENT: prefix = self.DOCUMENT_PREFIX else: prefix = self.QUERY_PREFIX if text.startswith(prefix): return text return f"{prefix}{text}" def _truncate_and_normalize(self, embedding: list[float], target_dim: int) -> list[float]: """ Truncate embedding to target dimension and normalize. Matryoshka representation learning allows dimension truncation. """ truncated = embedding[:target_dim] arr = np.array(truncated, dtype=np.float32) norm = np.linalg.norm(arr) if norm > 0: arr = arr / norm return arr.tolist() async def embed_with_task( self, text: str, task: EmbeddingTask, ) -> NomicEmbeddingResult: """ Generate embedding with specified task prefix. Args: text: Input text to embed task: DOCUMENT for indexing, QUERY for retrieval Returns: NomicEmbeddingResult with all dimension variants """ start_time = time.perf_counter() prefixed_text = self._add_prefix(text, task) try: client = await self._get_client() response = await client.post( f"{self._base_url}/api/embeddings", json={ "model": self._model, "prompt": prefixed_text, } ) response.raise_for_status() data = response.json() embedding = data.get("embedding", []) if not embedding: raise EmbeddingException( "Empty embedding returned", provider=self.PROVIDER_NAME, details={"text_length": len(text), "task": task.value} ) latency_ms = (time.perf_counter() - start_time) * 1000 embedding_256 = self._truncate_and_normalize(embedding, 256) embedding_512 = self._truncate_and_normalize(embedding, 512) embedding_full = self._truncate_and_normalize(embedding, len(embedding)) logger.debug( f"Generated Nomic embedding: task={task.value}, " f"dim={len(embedding)}, latency={latency_ms:.2f}ms" ) return NomicEmbeddingResult( embedding_full=embedding_full, embedding_256=embedding_256, embedding_512=embedding_512, dimension=len(embedding), model=self._model, task=task, latency_ms=latency_ms, ) except httpx.HTTPStatusError as e: raise EmbeddingException( f"Ollama API error: {e.response.status_code}", provider=self.PROVIDER_NAME, details={"status_code": e.response.status_code, "response": e.response.text} ) except httpx.RequestError as e: raise EmbeddingException( f"Ollama connection error: {e}", provider=self.PROVIDER_NAME, details={"base_url": self._base_url} ) except EmbeddingException: raise except Exception as e: raise EmbeddingException( f"Embedding generation failed: {e}", provider=self.PROVIDER_NAME ) async def embed_document(self, text: str) -> NomicEmbeddingResult: """ Generate embedding for document (with search_document: prefix). Use this when indexing documents into vector store. """ return await self.embed_with_task(text, EmbeddingTask.DOCUMENT) async def embed_query(self, text: str) -> NomicEmbeddingResult: """ Generate embedding for query (with search_query: prefix). Use this when searching/retrieving documents. """ return await self.embed_with_task(text, EmbeddingTask.QUERY) async def embed(self, text: str) -> list[float]: """ Generate embedding vector for a single text. Default uses QUERY task for backward compatibility. """ result = await self.embed_query(text) return result.embedding_full async def embed_batch(self, texts: list[str]) -> list[list[float]]: """ Generate embedding vectors for multiple texts. Uses QUERY task by default. """ embeddings = [] for text in texts: embedding = await self.embed(text) embeddings.append(embedding) return embeddings async def embed_documents_batch( self, texts: list[str], ) -> list[NomicEmbeddingResult]: """ Generate embeddings for multiple documents (DOCUMENT task). Use this when batch indexing documents. """ results = [] for text in texts: result = await self.embed_document(text) results.append(result) return results async def embed_queries_batch( self, texts: list[str], ) -> list[NomicEmbeddingResult]: """ Generate embeddings for multiple queries (QUERY task). Use this when batch processing queries. """ results = [] for text in texts: result = await self.embed_query(text) results.append(result) return results def get_dimension(self) -> int: """Get the dimension of embedding vectors.""" return self._dimension def get_provider_name(self) -> str: """Get the name of this embedding provider.""" return self.PROVIDER_NAME def get_config_schema(self) -> dict[str, Any]: """Get the configuration schema for Nomic provider.""" return { "base_url": { "type": "string", "title": "API 地址", "description": "Ollama API 地址", "default": "http://localhost:11434", }, "model": { "type": "string", "title": "模型名称", "description": "嵌入模型名称(推荐 nomic-embed-text v1.5)", "default": "nomic-embed-text", }, "dimension": { "type": "integer", "title": "向量维度", "description": "向量维度(支持 256/512/768)", "default": 768, }, "timeout_seconds": { "type": "integer", "title": "超时时间", "description": "请求超时时间(秒)", "default": 60, }, "enable_matryoshka": { "type": "boolean", "title": "Matryoshka 截断", "description": "启用 Matryoshka 维度截断", "default": True, }, } async def close(self) -> None: """Close the HTTP client.""" if self._client: await self._client.aclose() self._client = None