mirror of
https://github.com/Balshgit/gpt_chat_bot.git
synced 2026-02-03 11:40:39 +03:00
add testing database and chatgpt factories (#28)
* add testing database and chatgpt factories * include lint job to develop stage * reformat audioconverter save files to tmp directory * add api tests * update README.md
This commit is contained in:
@@ -32,8 +32,10 @@ async def about_me(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
async def about_bot(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
if not update.effective_message:
|
||||
return None
|
||||
chat_gpt_service = ChatGptService.build()
|
||||
model = await chat_gpt_service.get_current_chatgpt_model()
|
||||
await update.effective_message.reply_text(
|
||||
f"Бот использует бесплатную модель {settings.GPT_MODEL} для ответов на вопросы. "
|
||||
f"Бот использует бесплатную модель {model} для ответов на вопросы. "
|
||||
f"\nПринимает запросы на разных языках.\n\nБот так же умеет переводить русские голосовые сообщения в текст. "
|
||||
f"Просто пришлите голосовуху и получите поток сознания в виде текста, но без знаков препинания",
|
||||
parse_mode="Markdown",
|
||||
@@ -87,9 +89,9 @@ async def voice_recognize(update: Update, context: ContextTypes.DEFAULT_TYPE) ->
|
||||
|
||||
logger.info("file has been saved", filename=tmpfile.name)
|
||||
|
||||
speech_to_text_service = SpeechToTextService()
|
||||
speech_to_text_service = SpeechToTextService(filename=tmpfile.name)
|
||||
|
||||
speech_to_text_service.get_text_from_audio(filename=tmpfile.name)
|
||||
speech_to_text_service.get_text_from_audio()
|
||||
|
||||
part = 0
|
||||
while speech_to_text_service.text_parts or not speech_to_text_service.text_recognised:
|
||||
|
||||
@@ -28,14 +28,15 @@ class ChatGPTRepository:
|
||||
return result.scalars().all()
|
||||
|
||||
async def change_chatgpt_model_priority(self, model_id: int, priority: int) -> None:
|
||||
current_model = await self.get_current_chatgpt_model()
|
||||
|
||||
reset_priority_query = update(ChatGpt).values(priority=0).filter(ChatGpt.model == current_model)
|
||||
set_new_priority_query = update(ChatGpt).values(priority=priority).filter(ChatGpt.model == model_id)
|
||||
|
||||
query = update(ChatGpt).values(priority=priority).filter(ChatGpt.id == model_id)
|
||||
async with self.db.get_transaction_session() as session:
|
||||
await session.execute(reset_priority_query)
|
||||
await session.execute(set_new_priority_query)
|
||||
await session.execute(query)
|
||||
|
||||
async def reset_all_chatgpt_models_priority(self) -> None:
|
||||
query = update(ChatGpt).values(priority=0)
|
||||
|
||||
async with self.db.session() as session:
|
||||
await session.execute(query)
|
||||
|
||||
async def add_chatgpt_model(self, model: str, priority: int) -> dict[str, str | int]:
|
||||
query = (
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import subprocess # noqa
|
||||
import tempfile
|
||||
from concurrent.futures.thread import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Sequence
|
||||
@@ -21,22 +22,23 @@ from settings.config import settings
|
||||
|
||||
|
||||
class SpeechToTextService:
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, filename: str) -> None:
|
||||
self.filename = filename
|
||||
self.executor = ThreadPoolExecutor()
|
||||
self.recognizer = Recognizer()
|
||||
self.recognizer.energy_threshold = 50
|
||||
self.text_parts: dict[int, str] = {}
|
||||
self.text_recognised = False
|
||||
|
||||
def get_text_from_audio(self, filename: str) -> None:
|
||||
self.executor.submit(self.worker, filename=filename)
|
||||
def get_text_from_audio(self) -> None:
|
||||
self.executor.submit(self._worker)
|
||||
|
||||
def worker(self, filename: str) -> Any:
|
||||
self._convert_file_to_wav(filename)
|
||||
self._convert_audio_to_text(filename)
|
||||
def _worker(self) -> Any:
|
||||
self._convert_file_to_wav()
|
||||
self._convert_audio_to_text()
|
||||
|
||||
def _convert_audio_to_text(self, filename: str) -> None:
|
||||
wav_filename = f"{filename}.wav"
|
||||
def _convert_audio_to_text(self) -> None:
|
||||
wav_filename = f"{self.filename}.wav"
|
||||
|
||||
speech = AudioSegment.from_wav(wav_filename)
|
||||
speech_duration = len(speech)
|
||||
@@ -51,40 +53,38 @@ class SpeechToTextService:
|
||||
sound_segment = speech[i * AUDIO_SEGMENT_DURATION - 250 : i * AUDIO_SEGMENT_DURATION + ending]
|
||||
else:
|
||||
sound_segment = speech[i * AUDIO_SEGMENT_DURATION - 250 : (i + 1) * AUDIO_SEGMENT_DURATION]
|
||||
self.text_parts[i] = self._recognize_by_google(wav_filename, sound_segment)
|
||||
self.text_parts[i] = self._recognize_by_google(sound_segment)
|
||||
|
||||
self.text_recognised = True
|
||||
|
||||
# clean temp voice message main files
|
||||
try:
|
||||
os.remove(wav_filename)
|
||||
os.remove(filename)
|
||||
os.remove(self.filename)
|
||||
except FileNotFoundError as error:
|
||||
logger.error("error temps files not deleted", error=error, filenames=[filename, wav_filename])
|
||||
logger.error("error temps files not deleted", error=error, filenames=[self.filename, wav_filename])
|
||||
|
||||
@staticmethod
|
||||
def _convert_file_to_wav(filename: str) -> None:
|
||||
new_filename = filename + ".wav"
|
||||
cmd = ["ffmpeg", "-loglevel", "quiet", "-i", filename, "-vn", new_filename]
|
||||
def _convert_file_to_wav(self) -> None:
|
||||
new_filename = self.filename + ".wav"
|
||||
cmd = ["ffmpeg", "-loglevel", "quiet", "-i", self.filename, "-vn", new_filename]
|
||||
try:
|
||||
subprocess.run(args=cmd) # noqa: S603
|
||||
logger.info("file has been converted to wav", filename=new_filename)
|
||||
except Exception as error:
|
||||
logger.error("cant convert voice", error=error, filename=filename)
|
||||
logger.error("cant convert voice", error=error, filename=self.filename)
|
||||
|
||||
def _recognize_by_google(self, filename: str, sound_segment: AudioSegment) -> str:
|
||||
tmp_filename = f"{filename}_tmp_part"
|
||||
sound_segment.export(tmp_filename, format="wav")
|
||||
with AudioFile(tmp_filename) as source:
|
||||
audio_text = self.recognizer.listen(source)
|
||||
try:
|
||||
text = self.recognizer.recognize_google(audio_text, language="ru-RU")
|
||||
os.remove(tmp_filename)
|
||||
return text
|
||||
except SpeechRecognizerError as error:
|
||||
os.remove(tmp_filename)
|
||||
logger.error("error recognizing text with google", error=error)
|
||||
raise error
|
||||
def _recognize_by_google(self, sound_segment: AudioSegment) -> str:
|
||||
with tempfile.NamedTemporaryFile(delete=True) as tmpfile:
|
||||
tmpfile.write(sound_segment.raw_data)
|
||||
sound_segment.export(tmpfile, format="wav")
|
||||
with AudioFile(tmpfile) as source:
|
||||
audio_text = self.recognizer.listen(source)
|
||||
try:
|
||||
text = self.recognizer.recognize_google(audio_text, language="ru-RU")
|
||||
return text
|
||||
except SpeechRecognizerError as error:
|
||||
logger.error("error recognizing text with google", error=error)
|
||||
raise error
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -109,6 +109,9 @@ class ChatGptService:
|
||||
async def change_chatgpt_model_priority(self, model_id: int, priority: int) -> None:
|
||||
return await self.repository.change_chatgpt_model_priority(model_id=model_id, priority=priority)
|
||||
|
||||
async def reset_all_chatgpt_models_priority(self) -> None:
|
||||
return await self.repository.reset_all_chatgpt_models_priority()
|
||||
|
||||
async def add_chatgpt_model(self, gpt_model: str, priority: int) -> dict[str, str | int]:
|
||||
return await self.repository.add_chatgpt_model(model=gpt_model, priority=priority)
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ def _setup_db(app: FastAPI, settings: AppSettings) -> None:
|
||||
:param app: fastAPI application.
|
||||
"""
|
||||
engine = create_async_engine(
|
||||
str(settings.db_url),
|
||||
str(settings.async_db_url),
|
||||
echo=settings.DB_ECHO,
|
||||
execution_options={"isolation_level": "AUTOCOMMIT"},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user