From e2158768480658073dd99daa967449d9ac6a9194 Mon Sep 17 00:00:00 2001 From: grillazz Date: Sat, 3 May 2025 13:18:27 +0200 Subject: [PATCH] add README.md and test --- README.md | 20 ++++++++++++++ app/services/llm.py | 2 +- tests/chat.py | 65 +++++++++++++++------------------------------ 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 8533f45..ed4bb98 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@
  • Schedule jobs
  • Email Configuration
  • UV knowledge and inspirations
  • +
  • Integration with local LLM
  • Acknowledgments
  • @@ -162,6 +163,24 @@ This service supports plaintext and HTML emails, and also allows sending templat It is implemented as a singleton to ensure that only one SMTP connection is maintained throughout the application lifecycle, optimizing resource usage. +

    (back to top)

    + +### Large Language Model +The `/v1/ml/chat/` endpoint is designed to handle chat-based interactions with the LLM model. +It accepts a user prompt and streams responses back in real-time. +The endpoint leverages FastAPI's asynchronous capabilities to efficiently manage multiple simultaneous requests, +ensuring low latency and high throughput. + +FastAPI's async support is particularly beneficial for reducing I/O bottlenecks when connecting to the LLM model. +By using asynchronous HTTP clients like `httpx`, +the application can handle multiple I/O-bound tasks concurrently, +such as sending requests to the LLM server and streaming responses back to the client. +This approach minimizes idle time and optimizes resource utilization, making it ideal for high-performance applications. + +Install ollama and run the server +```shell +ollama run llama3.2 +```

    (back to top)

    @@ -215,6 +234,7 @@ I've included a few of my favorites to kick things off! - **[DEC 16 2024]** bump project to Python 3.13 :fast_forward: - **[JAN 28 2025]** add SMTP setup :email: - **[MAR 8 2025]** switch from poetry to uv :fast_forward: +- **[MAY 3 2025]** add large language model integration :robot:

    (back to top)

    diff --git a/app/services/llm.py b/app/services/llm.py index db29c28..2b28ad7 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -10,7 +10,7 @@ class StreamLLMService: async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]: """Stream chat completion responses from LLM.""" - # Send user message first + # Send the user a message first user_msg = { "role": "user", "content": prompt, diff --git a/tests/chat.py b/tests/chat.py index 25bfa2f..a231c6c 100644 --- a/tests/chat.py +++ b/tests/chat.py @@ -1,53 +1,30 @@ -from typing import Optional, AsyncGenerator - +import anyio import httpx import orjson +async def chat_with_endpoint(): + async with httpx.AsyncClient() as client: + while True: + # Get user input + prompt = input("\nYou: ") + if prompt.lower() == "exit": + break -class StreamLLMService: - def __init__(self, base_url: str = "http://localhost:11434/v1"): - self.base_url = base_url - self.model = "llama3.2" - - async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]: - """Stream chat completion responses from LLM.""" - # Send user message first - user_msg = { - "role": "user", - "content": prompt, - } - yield orjson.dumps(user_msg) + b"\n" - - # Open client as context manager and stream responses - async with httpx.AsyncClient(base_url=self.base_url) as client: + # Send request to the API + print("\nModel: ", end="", flush=True) async with client.stream( "POST", - "/chat/completions", - json={ - "model": self.model, - "messages": [{"role": "user", "content": prompt}], - "stream": True, - }, - timeout=60.0, + "http://localhost:8000/chat/", + data={"prompt": prompt}, + timeout=60 ) as response: - async for line in response.aiter_lines(): - print(line) - if line.startswith("data: ") and line != "data: [DONE]": + async for chunk in response.aiter_lines(): + if chunk: try: - json_line = line[6:] # Remove "data: " prefix - data = orjson.loads(json_line) - content = ( - data.get("choices", [{}])[0] - .get("delta", {}) - .get("content", "") - ) - if content: - model_msg = {"role": "model", "content": content} - yield orjson.dumps(model_msg) + b"\n" - except Exception: - pass + data = orjson.loads(chunk) + print(data["content"], end="", flush=True) + except Exception as e: + print(f"\nError parsing chunk: {e}") - -# FastAPI dependency -def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService: - return StreamLLMService(base_url=base_url or "http://localhost:11434/v1") +if __name__ == "__main__": + anyio.run(chat_with_endpoint)