connect to local ollama

This commit is contained in:
grillazz 2025-05-03 09:41:39 +02:00
parent 6f2db272c4
commit b5fcd0489a
4 changed files with 79 additions and 50 deletions

4
.env
View File

@ -2,14 +2,14 @@ PYTHONDONTWRITEBYTECODE=1
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1
# Postgres # Postgres
POSTGRES_HOST=db POSTGRES_HOST=localhost
POSTGRES_PORT=5432 POSTGRES_PORT=5432
POSTGRES_DB=devdb POSTGRES_DB=devdb
POSTGRES_USER=devdb POSTGRES_USER=devdb
POSTGRES_PASSWORD=secret POSTGRES_PASSWORD=secret
# Redis # Redis
REDIS_HOST=inmemory REDIS_HOST=localhost
REDIS_PORT=6379 REDIS_PORT=6379
REDIS_DB=2 REDIS_DB=2

View File

@ -10,36 +10,42 @@ class StreamLLMService:
async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]: async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
"""Stream chat completion responses from LLM.""" """Stream chat completion responses from LLM."""
# Send initial user message # Send user message first
yield orjson.dumps({"role": "user", "content": prompt}) + b"\n" user_msg = {
"role": "user",
"content": prompt,
}
yield orjson.dumps(user_msg) + b"\n"
# Open client as context manager and stream responses
async with httpx.AsyncClient(base_url=self.base_url) as client: async with httpx.AsyncClient(base_url=self.base_url) as client:
request_data = { async with client.stream(
"POST",
"/chat/completions",
json={
"model": self.model, "model": self.model,
"messages": [{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
"stream": True, "stream": True,
} },
timeout=60.0,
async with client.stream(
"POST", "/chat/completions", json=request_data, timeout=60.0
) as response: ) as response:
async for line in response.aiter_lines(): async for line in response.aiter_lines():
if not (line.startswith("data: ") and line != "data: [DONE]"): if line.startswith("data: ") and line != "data: [DONE]":
continue
try: try:
data = orjson.loads(line[6:]) # Skip "data: " prefix json_line = line[6:] # Remove "data: " prefix
if ( data = orjson.loads(json_line)
content := data.get("choices", [{}])[0] content = (
data.get("choices", [{}])[0]
.get("delta", {}) .get("delta", {})
.get("content", "") .get("content", "")
):
yield (
orjson.dumps({"role": "model", "content": content})
+ b"\n"
) )
if content:
model_msg = {"role": "model", "content": content}
yield orjson.dumps(model_msg) + b"\n"
except Exception: except Exception:
pass pass
# FastAPI dependency
def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService: def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
return StreamLLMService(base_url=base_url) return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")

View File

@ -1,6 +1,7 @@
services: services:
app: app:
container_name: fsap_app container_name: fsap_app
network_mode: host
build: . build: .
env_file: env_file:
- .env - .env
@ -22,6 +23,7 @@ services:
db: db:
container_name: fsap_db container_name: fsap_db
network_mode: host
build: build:
context: ./db context: ./db
dockerfile: Dockerfile dockerfile: Dockerfile
@ -46,6 +48,7 @@ services:
inmemory: inmemory:
image: redis:latest image: redis:latest
network_mode: host
container_name: fsap_inmemory container_name: fsap_inmemory
ports: ports:
- "6379:6379" - "6379:6379"

View File

@ -1,33 +1,53 @@
import anyio from typing import Optional, AsyncGenerator
import httpx import httpx
import orjson import orjson
API_URL = "http://localhost:8000/chat/"
class StreamLLMService:
def __init__(self, base_url: str = "http://localhost:11434/v1"):
self.base_url = base_url
self.model = "llama3.2"
async def chat_with_endpoint(): async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
async with httpx.AsyncClient() as client: """Stream chat completion responses from LLM."""
while True: # Send user message first
prompt = input("\nYou: ") user_msg = {
if prompt.lower() == "exit": "role": "user",
break "content": prompt,
}
yield orjson.dumps(user_msg) + b"\n"
print("\nModel: ", end="", flush=True) # Open client as context manager and stream responses
try: async with httpx.AsyncClient(base_url=self.base_url) as client:
async with client.stream( async with client.stream(
"POST", API_URL, data={"prompt": prompt}, timeout=60 "POST",
"/chat/completions",
json={
"model": self.model,
"messages": [{"role": "user", "content": prompt}],
"stream": True,
},
timeout=60.0,
) as response: ) as response:
async for chunk in response.aiter_lines(): async for line in response.aiter_lines():
if not chunk: print(line)
continue if line.startswith("data: ") and line != "data: [DONE]":
try: try:
print(orjson.loads(chunk)["content"], end="", flush=True) json_line = line[6:] # Remove "data: " prefix
except Exception as e: data = orjson.loads(json_line)
print(f"\nError parsing chunk: {e}") content = (
except httpx.RequestError as e: data.get("choices", [{}])[0]
print(f"\nConnection error: {e}") .get("delta", {})
.get("content", "")
)
if content:
model_msg = {"role": "model", "content": content}
yield orjson.dumps(model_msg) + b"\n"
except Exception:
pass
if __name__ == "__main__": # FastAPI dependency
anyio.run(chat_with_endpoint) def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")