diff --git a/Makefile b/Makefile index c55b42c..641ac0f 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ py-upgrade: ## Upgrade project py files with pyupgrade library for python versio .PHONY: lint lint: ## Lint project code. - poetry run ruff check --fix . + uv run ruff check --fix . .PHONY: slim-build slim-build: ## with power of docker-slim build smaller and safer images diff --git a/app/main.py b/app/main.py index ea69c93..6702847 100644 --- a/app/main.py +++ b/app/main.py @@ -7,11 +7,11 @@ from apscheduler.eventbrokers.redis import RedisEventBroker from fastapi import Depends, FastAPI from app.api.health import router as health_router +from app.api.ml import router as ml_router from app.api.nonsense import router as nonsense_router from app.api.shakespeare import router as shakespeare_router from app.api.stuff import router as stuff_router from app.api.user import router as user_router -from app.api.ml import router as ml_router from app.config import settings as global_settings from app.database import engine from app.redis import get_redis diff --git a/app/services/llm.py b/app/services/llm.py index 2b28ad7..b3b899a 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -1,6 +1,7 @@ +from collections.abc import AsyncGenerator + import httpx import orjson -from typing import AsyncGenerator, Optional class StreamLLMService: @@ -8,7 +9,7 @@ class StreamLLMService: self.base_url = base_url self.model = "llama3.2" - async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]: + async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes]: """Stream chat completion responses from LLM.""" # Send the user a message first user_msg = { @@ -47,5 +48,5 @@ class StreamLLMService: # FastAPI dependency -def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService: +def get_llm_service(base_url: str | None = None) -> StreamLLMService: return StreamLLMService(base_url=base_url or "http://localhost:11434/v1") diff --git a/tests/chat.py b/tests/chat.py index a231c6c..b3fb325 100644 --- a/tests/chat.py +++ b/tests/chat.py @@ -2,6 +2,7 @@ import anyio import httpx import orjson + async def chat_with_endpoint(): async with httpx.AsyncClient() as client: while True: @@ -14,9 +15,9 @@ async def chat_with_endpoint(): print("\nModel: ", end="", flush=True) async with client.stream( "POST", - "http://localhost:8000/chat/", + "http://0.0.0.0:8080/v1/ml/chat/", data={"prompt": prompt}, - timeout=60 + timeout=60, ) as response: async for chunk in response.aiter_lines(): if chunk: @@ -26,5 +27,6 @@ async def chat_with_endpoint(): except Exception as e: print(f"\nError parsing chunk: {e}") + if __name__ == "__main__": anyio.run(chat_with_endpoint)