diff --git a/ai-service/app/services/orchestrator.py b/ai-service/app/services/orchestrator.py index 42d16dc..9b3d418 100644 --- a/ai-service/app/services/orchestrator.py +++ b/ai-service/app/services/orchestrator.py @@ -119,13 +119,7 @@ class OrchestratorService: max_evidence_tokens=getattr(settings, "rag_max_evidence_tokens", 2000), enable_rag=True, ) - self._llm_config = LLMConfig( - model=getattr(settings, "llm_model", "gpt-4o-mini"), - max_tokens=getattr(settings, "llm_max_tokens", 2048), - temperature=getattr(settings, "llm_temperature", 0.7), - timeout_seconds=getattr(settings, "llm_timeout_seconds", 30), - max_retries=getattr(settings, "llm_max_retries", 3), - ) + self._llm_config: LLMConfig | None = None async def generate( self, @@ -345,7 +339,6 @@ class OrchestratorService: try: ctx.llm_response = await self._llm_client.generate( messages=messages, - config=self._llm_config, ) ctx.diagnostics["llm_mode"] = "live" ctx.diagnostics["llm_model"] = ctx.llm_response.model @@ -627,7 +620,7 @@ class OrchestratorService: """ messages = self._build_llm_messages(ctx) - async for chunk in self._llm_client.stream_generate(messages, self._llm_config): + async for chunk in self._llm_client.stream_generate(messages): if not state_machine.can_send_message(): break