From 0c326fd12d552ed0a4df776997275e0aa2027e02 Mon Sep 17 00:00:00 2001 From: Dmitry Afanasyev Date: Sun, 28 Aug 2022 15:02:18 +0300 Subject: [PATCH] reworked web_parser to class --- app/core/bot.py | 15 ++-- app/core/parse_web.py | 170 +++++++++++++++++++++--------------------- tests/bot/test_bot.py | 7 +- 3 files changed, 97 insertions(+), 95 deletions(-) diff --git a/app/core/bot.py b/app/core/bot.py index c9017ca..89346f2 100644 --- a/app/core/bot.py +++ b/app/core/bot.py @@ -5,7 +5,7 @@ from aiogram import Bot, types from aiogram.contrib.middlewares.logging import LoggingMiddleware from aiogram.dispatcher import Dispatcher from aiogram.utils.callback_data import CallbackData -from app.core.parse_web import get_driver, parse_yandex_maps +from app.core.parse_web import WebParser from app.settings import TELEGRAM_API_TOKEN @@ -14,7 +14,6 @@ class TransportBot: bot: Bot = Bot(TELEGRAM_API_TOKEN) dispatcher: Dispatcher = Dispatcher(bot) dispatcher.middleware.setup(LoggingMiddleware()) - stations_cb: CallbackData = CallbackData('station', 'direction') @staticmethod @@ -46,8 +45,8 @@ class TransportBot: async def home_office( query: types.CallbackQuery, callback_data: dict[str, str] ) -> types.Message: - driver = get_driver() - text = parse_yandex_maps( + driver = WebParser.get_driver() + text = WebParser.parse_yandex_maps( driver=driver, url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21', message='Остановка Б. Академическая ул, д. 15', @@ -62,8 +61,8 @@ class TransportBot: async def office_home( query: types.CallbackQuery, callback_data: dict[str, str] ) -> types.Message: - driver = get_driver() - text = parse_yandex_maps( + driver = WebParser.get_driver() + text = WebParser.parse_yandex_maps( driver=driver, url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?ll=37.505402%2C55.800214&tab=overview&z=21', message='Остановка Улица Алабяна', @@ -87,8 +86,8 @@ class TransportBot: if not chat_ids: return None - driver = get_driver() - text = parse_yandex_maps( + driver = WebParser.get_driver() + text = WebParser.parse_yandex_maps( driver=driver, url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21', message='Остановка Б. Академическая ул, д. 15', diff --git a/app/core/parse_web.py b/app/core/parse_web.py index 6beaa48..55e2a6a 100644 --- a/app/core/parse_web.py +++ b/app/core/parse_web.py @@ -18,93 +18,97 @@ from selenium.webdriver.firefox.service import Service from selenium.webdriver.firefox.webdriver import RemoteWebDriver, WebDriver -def download_gecko_driver() -> None: - gecko_driver = ( - f'https://github.com/mozilla/geckodriver/releases/download/v{GECKO_DRIVER_VERSION}/' - f'geckodriver-v{GECKO_DRIVER_VERSION}-linux64.tar.gz' - ) - - if not Path(BASE_DIR / 'geckodriver').exists(): - logger.info(f'Downloading gecodriver v {GECKO_DRIVER_VERSION}...') - geckodriver_file = wget.download( - url=gecko_driver, out=BASE_DIR.resolve().as_posix() +class WebParser: + @staticmethod + def download_gecko_driver() -> None: + gecko_driver_url = ( + f'https://github.com/mozilla/geckodriver/releases/download/v{GECKO_DRIVER_VERSION}/' + f'geckodriver-v{GECKO_DRIVER_VERSION}-linux64.tar.gz' ) - with tarfile.open(geckodriver_file) as tar: - tar.extractall(BASE_DIR) - os.remove(f'{BASE_DIR / "geckodriver"}-v{GECKO_DRIVER_VERSION}-linux64.tar.gz') - logger.info(f'\ngeckodriver has been downloaded to folder {BASE_DIR}') - - -def configure_firefox_driver(private_window: bool = False) -> WebDriver | None: - opt = options.Options() - opt.headless = True - opt.add_argument('-profile') - opt.add_argument(f'{Path.home()}/snap/firefox/common/.mozilla/firefox') - if private_window: - opt.set_preference("browser.privatebrowsing.autostart", True) - service = Service(executable_path=(BASE_DIR / 'geckodriver').as_posix()) - try: - firefox_driver = webdriver.Firefox(service=service, options=opt) - return firefox_driver - except WebDriverException: - logger.error('Error configuring webdriver. Possible it already configured') - return None - - -def parse_yandex_maps( - url: str, message: str, driver: RemoteWebDriver | None = None -) -> str: - if not driver: - logger.error('Driver is not configured') - return 'Что-то пошло не так. :( Драйвер Firefox не сконфигурирован.' - - driver.get(url) - time.sleep(1) - - bus_arrival: dict[str, str | None] = defaultdict(str) - - try: - web_elements = driver.find_elements( - by='class name', value='masstransit-vehicle-snippet-view' - ) - for web_element in web_elements: - bus = web_element.find_element( - by='class name', value='masstransit-vehicle-snippet-view__main-text' + if not Path(BASE_DIR / 'geckodriver').exists(): + logger.info(f'Downloading gecodriver v {GECKO_DRIVER_VERSION}...') + geckodriver_file = wget.download( + url=gecko_driver_url, out=BASE_DIR.resolve().as_posix() ) - if bus: - bus_arrival_time = web_element.find_element( - by='class name', - value='masstransit-prognoses-view__title-text', + + with tarfile.open(geckodriver_file) as tar: + tar.extractall(BASE_DIR) + os.remove( + f'{BASE_DIR / "geckodriver"}-v{GECKO_DRIVER_VERSION}-linux64.tar.gz' + ) + logger.info(f'\ngeckodriver has been downloaded to folder {BASE_DIR}') + + @staticmethod + def configure_firefox_driver(private_window: bool = False) -> WebDriver | None: + opt = options.Options() + opt.headless = True + opt.add_argument('-profile') + opt.add_argument(f'{Path.home()}/snap/firefox/common/.mozilla/firefox') + if private_window: + opt.set_preference("browser.privatebrowsing.autostart", True) + service = Service(executable_path=(BASE_DIR / 'geckodriver').as_posix()) + try: + firefox_driver = webdriver.Firefox(service=service, options=opt) + return firefox_driver + except WebDriverException: + logger.error('Error configuring webdriver. Possible it already configured') + return None + + @staticmethod + def parse_yandex_maps( + url: str, message: str, driver: RemoteWebDriver | None = None + ) -> str: + if not driver: + logger.error('Driver is not configured') + return 'Что-то пошло не так. :( Драйвер Firefox не сконфигурирован.' + + driver.get(url) + time.sleep(1) + + bus_arrival: dict[str, str | None] = defaultdict(str) + + try: + web_elements = driver.find_elements( + by='class name', value='masstransit-vehicle-snippet-view' + ) + for web_element in web_elements: + bus = web_element.find_element( + by='class name', value='masstransit-vehicle-snippet-view__main-text' ) - match bus.text: - case "300": - bus_arrival["Автобус 300"] = ( - bus_arrival_time.text if bus_arrival_time else None - ) - case "т19": - bus_arrival["Автобус Т19"] = ( - bus_arrival_time.text if bus_arrival_time else None - ) - except NoSuchElementException: - pass - except StaleElementReferenceException: - pass + if bus: + bus_arrival_time = web_element.find_element( + by='class name', + value='masstransit-prognoses-view__title-text', + ) + match bus.text: + case "300": + bus_arrival["Автобус 300"] = ( + bus_arrival_time.text if bus_arrival_time else None + ) + case "т19": + bus_arrival["Автобус Т19"] = ( + bus_arrival_time.text if bus_arrival_time else None + ) + except NoSuchElementException: + pass + except StaleElementReferenceException: + pass - if not any(bus_arrival.values()): - return 'Автобусов 300 или Т19 не найдено. \n\nСмотри на карте :)' + if not any(bus_arrival.values()): + return 'Автобусов 300 или Т19 не найдено. \n\nСмотри на карте :)' - answer = f'{message}\n\n' - for bus_name, arrival_time in bus_arrival.items(): - answer += f'{bus_name} - {arrival_time}\n' - return answer + answer = f'{message}\n\n' + for bus_name, arrival_time in bus_arrival.items(): + answer += f'{bus_name} - {arrival_time}\n' + return answer - -@timed_cache(seconds=DRIVER_SESSION_TTL) -def get_driver() -> RemoteWebDriver: - opt = options.Options() - opt.headless = True - driver = RemoteWebDriver( - command_executor='http://selenoid_host:4444/wd/hub', options=opt - ) - return driver + @staticmethod + @timed_cache(seconds=DRIVER_SESSION_TTL) + def get_driver() -> RemoteWebDriver: + opt = options.Options() + opt.headless = True + driver = RemoteWebDriver( + command_executor='http://selenoid_host:4444/wd/hub', options=opt + ) + return driver diff --git a/tests/bot/test_bot.py b/tests/bot/test_bot.py index b7804a3..88b1ee9 100644 --- a/tests/bot/test_bot.py +++ b/tests/bot/test_bot.py @@ -4,7 +4,7 @@ import pytest from aiogram import Bot, Dispatcher, types from aiogram.dispatcher.filters.builtin import Command from aiogram.types import Update -from app.core.bot import dispatcher +from app.core.bot import TransportBot from tests.conftest import FakeTelegram from tests.data.factories import UserFactory @@ -25,8 +25,8 @@ async def test_parse_yandex_maps(bot: Bot) -> None: async def test_command1(bot: Bot) -> None: - dispatcher.bot = bot - handlers = dispatcher.message_handlers.handlers + TransportBot.dispatcher.bot = bot + handlers = TransportBot.dispatcher.message_handlers.handlers for handler in handlers: handl = list( filter(lambda obj: isinstance(obj.filter, Command), handler.filters) @@ -66,5 +66,4 @@ async def test_update(dispatcher_fixture: Dispatcher, bot: Bot) -> None: update = Update(**data) dispatcher_fixture.message_handler() await dispatcher_fixture.process_update(update) - assert True