From c8ff69efa5f0d9cb1cabb725452ae02173f9cad5 Mon Sep 17 00:00:00 2001
From: grillazz <the@grillazz.com>
Date: Sat, 3 May 2025 08:35:03 +0200
Subject: [PATCH] refactor llm service

---
 app/services/llm.py | 56 ++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/app/services/llm.py b/app/services/llm.py
index 3d6ed34..eb8903c 100644
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -10,42 +10,36 @@ class StreamLLMService:
 
     async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
         """Stream chat completion responses from LLM."""
-        # Send user message first
-        user_msg = {
-            'role': 'user',
-            'content': prompt,
-        }
-        yield orjson.dumps(user_msg) + b'\n'
+        # Send initial user message
+        yield orjson.dumps({"role": "user", "content": prompt}) + b"\n"
 
-        # Open client as context manager and stream responses
         async with httpx.AsyncClient(base_url=self.base_url) as client:
+            request_data = {
+                "model": self.model,
+                "messages": [{"role": "user", "content": prompt}],
+                "stream": True,
+            }
+
             async with client.stream(
-                    "POST",
-                    "/chat/completions",
-                    json={
-                        "model": self.model,
-                        "messages": [{"role": "user", "content": prompt}],
-                        "stream": True
-                    },
-                    timeout=60.0
+                "POST", "/chat/completions", json=request_data, timeout=60.0
             ) as response:
                 async for line in response.aiter_lines():
-                    print(line)
-                    if line.startswith("data: ") and line != "data: [DONE]":
-                        try:
-                            json_line = line[6:]  # Remove "data: " prefix
-                            data = orjson.loads(json_line)
-                            content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
-                            if content:
-                                model_msg = {
-                                    'role': 'model',
-                                    'content': content
-                                }
-                                yield orjson.dumps(model_msg) + b'\n'
-                        except Exception:
-                            pass
+                    if not (line.startswith("data: ") and line != "data: [DONE]"):
+                        continue
+                    try:
+                        data = orjson.loads(line[6:])  # Skip "data: " prefix
+                        if (
+                            content := data.get("choices", [{}])[0]
+                            .get("delta", {})
+                            .get("content", "")
+                        ):
+                            yield (
+                                orjson.dumps({"role": "model", "content": content})
+                                + b"\n"
+                            )
+                    except Exception:
+                        pass
 
 
-# FastAPI dependency
 def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
-    return StreamLLMService(base_url=base_url)
\ No newline at end of file
+    return StreamLLMService(base_url=base_url)