""" Base LLM client interface. [AC-AISVC-02, AC-AISVC-06] Abstract interface for LLM providers. Design reference: design.md Section 8.1 - LLMClient interface - generate(prompt, params) -> text - stream_generate(prompt, params) -> iterator[delta] """ from abc import ABC, abstractmethod from collections.abc import AsyncGenerator from dataclasses import dataclass, field from typing import Any @dataclass class LLMConfig: """ Configuration for LLM client. [AC-AISVC-02] Supports configurable model parameters. """ model: str = "gpt-4o-mini" max_tokens: int = 2048 temperature: float = 0.7 top_p: float = 1.0 timeout_seconds: int = 30 max_retries: int = 3 extra_params: dict[str, Any] = field(default_factory=dict) @dataclass class LLMResponse: """ Response from LLM generation. [AC-AISVC-02] Contains generated content and metadata. """ content: str model: str usage: dict[str, int] = field(default_factory=dict) finish_reason: str = "stop" metadata: dict[str, Any] = field(default_factory=dict) @dataclass class LLMStreamChunk: """ Streaming chunk from LLM. [AC-AISVC-06, AC-AISVC-07] Incremental output for SSE streaming. """ delta: str model: str finish_reason: str | None = None metadata: dict[str, Any] = field(default_factory=dict) class LLMClient(ABC): """ Abstract base class for LLM clients. [AC-AISVC-02, AC-AISVC-06] Provides unified interface for different LLM providers. Design reference: design.md Section 8.2 - Plugin points - OpenAICompatibleClient / LocalModelClient can be swapped """ @abstractmethod async def generate( self, messages: list[dict[str, str]], config: LLMConfig | None = None, **kwargs: Any, ) -> LLMResponse: """ Generate a non-streaming response. [AC-AISVC-02] Returns complete response for ChatResponse. Args: messages: List of chat messages with 'role' and 'content'. config: Optional LLM configuration overrides. **kwargs: Additional provider-specific parameters. Returns: LLMResponse with generated content and metadata. Raises: LLMException: If generation fails. """ pass @abstractmethod async def stream_generate( self, messages: list[dict[str, str]], config: LLMConfig | None = None, **kwargs: Any, ) -> AsyncGenerator[LLMStreamChunk, None]: """ Generate a streaming response. [AC-AISVC-06, AC-AISVC-07] Yields incremental chunks for SSE. Args: messages: List of chat messages with 'role' and 'content'. config: Optional LLM configuration overrides. **kwargs: Additional provider-specific parameters. Yields: LLMStreamChunk with incremental content. Raises: LLMException: If generation fails. """ pass @abstractmethod async def close(self) -> None: """Close the client and release resources.""" pass