connect to local ollama

2026-06-10 06:40:38 +03:00 · 2025-05-03 09:41:39 +02:00
parent 6f2db272c4
commit b5fcd0489a
4 changed files with 79 additions and 50 deletions
@@ -2,14 +2,14 @@ PYTHONDONTWRITEBYTECODE=1
 PYTHONUNBUFFERED=1

 # Postgres
-POSTGRES_HOST=db
+POSTGRES_HOST=localhost
 POSTGRES_PORT=5432
 POSTGRES_DB=devdb
 POSTGRES_USER=devdb
 POSTGRES_PASSWORD=secret

 # Redis
-REDIS_HOST=inmemory
+REDIS_HOST=localhost
 REDIS_PORT=6379
 REDIS_DB=2

@@ -10,36 +10,42 @@ class StreamLLMService:

    async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
        """Stream chat completion responses from LLM."""
-        # Send initial user message
-        yield orjson.dumps({"role": "user", "content": prompt}) + b"\n"
+        # Send user message first
+        user_msg = {
+            "role": "user",
+            "content": prompt,
+        }
+        yield orjson.dumps(user_msg) + b"\n"

+        # Open client as context manager and stream responses
        async with httpx.AsyncClient(base_url=self.base_url) as client:
-            request_data = {
+            async with client.stream(
+                "POST",
+                "/chat/completions",
+                json={
                    "model": self.model,
                    "messages": [{"role": "user", "content": prompt}],
                    "stream": True,
-            }
-
-            async with client.stream(
-                "POST", "/chat/completions", json=request_data, timeout=60.0
+                },
+                timeout=60.0,
            ) as response:
                async for line in response.aiter_lines():
-                    if not (line.startswith("data: ") and line != "data: [DONE]"):
-                        continue
+                    if line.startswith("data: ") and line != "data: [DONE]":
                        try:
-                        data = orjson.loads(line[6:])  # Skip "data: " prefix
-                        if (
-                            content := data.get("choices", [{}])[0]
+                            json_line = line[6:]  # Remove "data: " prefix
+                            data = orjson.loads(json_line)
+                            content = (
+                                data.get("choices", [{}])[0]
                                .get("delta", {})
                                .get("content", "")
-                        ):
-                            yield (
-                                orjson.dumps({"role": "model", "content": content})
-                                + b"\n"
                            )
+                            if content:
+                                model_msg = {"role": "model", "content": content}
+                                yield orjson.dumps(model_msg) + b"\n"
                        except Exception:
                            pass


+# FastAPI dependency
 def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
-    return StreamLLMService(base_url=base_url)
+    return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")
@@ -1,6 +1,7 @@
 services:
  app:
    container_name: fsap_app
+    network_mode: host
    build: .
    env_file:
      - .env
@@ -22,6 +23,7 @@ services:

  db:
    container_name: fsap_db
+    network_mode: host
    build:
      context: ./db
      dockerfile: Dockerfile
@@ -46,6 +48,7 @@ services:

  inmemory:
    image: redis:latest
+    network_mode: host
    container_name: fsap_inmemory
    ports:
      - "6379:6379"
@@ -1,33 +1,53 @@
-import anyio
+from typing import Optional, AsyncGenerator
+
 import httpx
 import orjson

-API_URL = "http://localhost:8000/chat/"

+class StreamLLMService:
+    def __init__(self, base_url: str = "http://localhost:11434/v1"):
+        self.base_url = base_url
+        self.model = "llama3.2"

-async def chat_with_endpoint():
-    async with httpx.AsyncClient() as client:
-        while True:
-            prompt = input("\nYou: ")
-            if prompt.lower() == "exit":
-                break
+    async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
+        """Stream chat completion responses from LLM."""
+        # Send user message first
+        user_msg = {
+            "role": "user",
+            "content": prompt,
+        }
+        yield orjson.dumps(user_msg) + b"\n"

-            print("\nModel: ", end="", flush=True)
-            try:
+        # Open client as context manager and stream responses
+        async with httpx.AsyncClient(base_url=self.base_url) as client:
            async with client.stream(
-                    "POST", API_URL, data={"prompt": prompt}, timeout=60
+                "POST",
+                "/chat/completions",
+                json={
+                    "model": self.model,
+                    "messages": [{"role": "user", "content": prompt}],
+                    "stream": True,
+                },
+                timeout=60.0,
            ) as response:
-                    async for chunk in response.aiter_lines():
-                        if not chunk:
-                            continue
-
+                async for line in response.aiter_lines():
+                    print(line)
+                    if line.startswith("data: ") and line != "data: [DONE]":
                        try:
-                            print(orjson.loads(chunk)["content"], end="", flush=True)
-                        except Exception as e:
-                            print(f"\nError parsing chunk: {e}")
-            except httpx.RequestError as e:
-                print(f"\nConnection error: {e}")
+                            json_line = line[6:]  # Remove "data: " prefix
+                            data = orjson.loads(json_line)
+                            content = (
+                                data.get("choices", [{}])[0]
+                                .get("delta", {})
+                                .get("content", "")
+                            )
+                            if content:
+                                model_msg = {"role": "model", "content": content}
+                                yield orjson.dumps(model_msg) + b"\n"
+                        except Exception:
+                            pass


-if __name__ == "__main__":
-    anyio.run(chat_with_endpoint)
+# FastAPI dependency
+def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
+    return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")