refactor llm service

2025-11-30 13:20:40 +03:00 · 2025-05-03 08:35:03 +02:00
parent f261fb3e13
commit c8ff69efa5
1 changed files with 25 additions and 31 deletions
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -10,42 +10,36 @@ class StreamLLMService:
    async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
        """Stream chat completion responses from LLM."""
-        # Send user message first
+        # Send initial user message
-        user_msg = {
+        yield orjson.dumps({"role": "user", "content": prompt}) + b"\n"
            'role': 'user',
            'content': prompt,
        }
        yield orjson.dumps(user_msg) + b'\n'
        # Open client as context manager and stream responses
        async with httpx.AsyncClient(base_url=self.base_url) as client:
            request_data = {
                "model": self.model,
                "messages": [{"role": "user", "content": prompt}],
                "stream": True,
            }
            async with client.stream(
-                    "POST",
+                "POST", "/chat/completions", json=request_data, timeout=60.0
                    "/chat/completions",
                    json={
                        "model": self.model,
                        "messages": [{"role": "user", "content": prompt}],
                        "stream": True
                    },
                    timeout=60.0
            ) as response:
                async for line in response.aiter_lines():
-                    print(line)
+                    if not (line.startswith("data: ") and line != "data: [DONE]"):
-                    if line.startswith("data: ") and line != "data: [DONE]":
+                        continue
-                        try:
+                    try:
-                            json_line = line[6:]  # Remove "data: " prefix
+                        data = orjson.loads(line[6:])  # Skip "data: " prefix
-                            data = orjson.loads(json_line)
+                        if (
-                            content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                            content := data.get("choices", [{}])[0]
-                            if content:
+                            .get("delta", {})
-                                model_msg = {
+                            .get("content", "")
-                                    'role': 'model',
+                        ):
-                                    'content': content
+                            yield (
-                                }
+                                orjson.dumps({"role": "model", "content": content})
-                                yield orjson.dumps(model_msg) + b'\n'
+                                + b"\n"
-                        except Exception:
+                            )
-                            pass
+                    except Exception:
                        pass
 # FastAPI dependency
 def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
-    return StreamLLMService(base_url=base_url)
+    return StreamLLMService(base_url=base_url)