From aaa7b5c57d133f25ec223a31075de48b289bb64c Mon Sep 17 00:00:00 2001 From: Dmitry Afanasyev Date: Sun, 28 Aug 2022 01:33:35 +0300 Subject: [PATCH] refactor parse web --- app/core/bot.py | 17 +++++----- app/core/parse_web.py | 73 ++++++++++++++++++++++--------------------- app/main.py | 2 +- tests/bot/test_bot.py | 2 +- 4 files changed, 47 insertions(+), 47 deletions(-) diff --git a/app/core/bot.py b/app/core/bot.py index 0af69a5..f894a24 100644 --- a/app/core/bot.py +++ b/app/core/bot.py @@ -4,7 +4,7 @@ from aiogram import Bot, types from aiogram.contrib.middlewares.logging import LoggingMiddleware from aiogram.dispatcher import Dispatcher from aiogram.utils.callback_data import CallbackData -from app.core.parse_web import get_driver, parse_site +from app.core.parse_web import get_driver, parse_yandex_maps from app.settings import TELEGRAM_API_TOKEN bot = Bot(token=TELEGRAM_API_TOKEN) @@ -37,10 +37,9 @@ async def home_office( query: types.CallbackQuery, callback_data: dict[str, str] ) -> types.Message: driver = get_driver() - text = parse_site( + text = parse_yandex_maps( driver=driver, - url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/' - '?l=masstransit&ll=37.527754%2C55.823507&tab=overview&z=21', + url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21', message='Остановка Б. Академическая ул, д. 15', ) @@ -54,10 +53,9 @@ async def office_home( query: types.CallbackQuery, callback_data: dict[str, str] ) -> types.Message: driver = get_driver() - text = parse_site( + text = parse_yandex_maps( driver=driver, - url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?' - 'l=masstransit&ll=37.505338%2C55.800160&tab=overview&z=211', + url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?ll=37.505402%2C55.800214&tab=overview&z=21', message='Остановка Улица Алабяна', ) @@ -80,10 +78,9 @@ async def echo(message: types.Message) -> types.Message: async def morning_bus_mailing(chat_ids: list[int]) -> None: driver = get_driver() - text = parse_site( + text = parse_yandex_maps( driver=driver, - url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/' - '?l=masstransit&ll=37.527754%2C55.823507&tab=overview&z=21', + url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21', message='Остановка Б. Академическая ул, д. 15', ) await asyncio.gather( diff --git a/app/core/parse_web.py b/app/core/parse_web.py index b07c516..6beaa48 100644 --- a/app/core/parse_web.py +++ b/app/core/parse_web.py @@ -1,6 +1,7 @@ import os import tarfile import time +from collections import defaultdict from pathlib import Path import wget @@ -51,51 +52,53 @@ def configure_firefox_driver(private_window: bool = False) -> WebDriver | None: return None -def parse_site(url: str, message: str, driver: RemoteWebDriver | None = None) -> str: +def parse_yandex_maps( + url: str, message: str, driver: RemoteWebDriver | None = None +) -> str: if not driver: logger.error('Driver is not configured') return 'Что-то пошло не так. :( Драйвер Firefox не сконфигурирован.' + driver.get(url) time.sleep(1) - bus_300, bus_t19 = None, None - bus_300_arrival, bus_t19_arrival = None, None + bus_arrival: dict[str, str | None] = defaultdict(str) - elements = driver.find_elements( - by='class name', value='masstransit-brief-schedule-view' - ) + try: + web_elements = driver.find_elements( + by='class name', value='masstransit-vehicle-snippet-view' + ) + for web_element in web_elements: + bus = web_element.find_element( + by='class name', value='masstransit-vehicle-snippet-view__main-text' + ) + if bus: + bus_arrival_time = web_element.find_element( + by='class name', + value='masstransit-prognoses-view__title-text', + ) + match bus.text: + case "300": + bus_arrival["Автобус 300"] = ( + bus_arrival_time.text if bus_arrival_time else None + ) + case "т19": + bus_arrival["Автобус Т19"] = ( + bus_arrival_time.text if bus_arrival_time else None + ) + except NoSuchElementException: + pass + except StaleElementReferenceException: + pass - for element in elements: - try: - bus_300 = element.find_element( - by='css selector', value='[aria-label="300"]' - ) - bus_300_arrival = element.find_element( - by='class name', value='masstransit-prognoses-view__title-text' - ) - bus_t19 = element.find_element( - by='css selector', value='[aria-label="т19"]' - ) - bus_t19_arrival = element.find_element( - by='class name', value='masstransit-prognoses-view__title-text' - ) - except NoSuchElementException: - pass - except StaleElementReferenceException: - pass - no_bus_at_all = True - answer = f'{message}\n\n' - if bus_300 and bus_300_arrival: - answer += f'Автобус {bus_300.text} - {bus_300_arrival.text}\n' - no_bus_at_all = False - if bus_t19 and bus_t19_arrival: - answer += f'Автобус {bus_t19.text} - {bus_t19_arrival.text}' - no_bus_at_all = False - if not no_bus_at_all: - return answer - if no_bus_at_all: + if not any(bus_arrival.values()): return 'Автобусов 300 или Т19 не найдено. \n\nСмотри на карте :)' + answer = f'{message}\n\n' + for bus_name, arrival_time in bus_arrival.items(): + answer += f'{bus_name} - {arrival_time}\n' + return answer + @timed_cache(seconds=DRIVER_SESSION_TTL) def get_driver() -> RemoteWebDriver: diff --git a/app/main.py b/app/main.py index 5b3568f..2b259c8 100644 --- a/app/main.py +++ b/app/main.py @@ -22,7 +22,7 @@ from app.settings import ( WEBHOOK_URL, ) -queue = asyncio.Queue() # type: ignore +queue: asyncio.Queue = asyncio.Queue() # type: ignore async def on_startup(dp: Dispatcher) -> None: diff --git a/tests/bot/test_bot.py b/tests/bot/test_bot.py index 8ea64f2..b7804a3 100644 --- a/tests/bot/test_bot.py +++ b/tests/bot/test_bot.py @@ -13,7 +13,7 @@ pytestmark = [ ] -async def test_parse_site(bot: Bot) -> None: +async def test_parse_yandex_maps(bot: Bot) -> None: tg_user = UserFactory().as_dict() user = types.User(**tg_user)