reworked web_parser to class

This commit is contained in:
Dmitry Afanasyev 2022-08-28 15:02:18 +03:00
parent 591d5ea01a
commit 0c326fd12d
3 changed files with 97 additions and 95 deletions

View File

@ -5,7 +5,7 @@ from aiogram import Bot, types
from aiogram.contrib.middlewares.logging import LoggingMiddleware from aiogram.contrib.middlewares.logging import LoggingMiddleware
from aiogram.dispatcher import Dispatcher from aiogram.dispatcher import Dispatcher
from aiogram.utils.callback_data import CallbackData from aiogram.utils.callback_data import CallbackData
from app.core.parse_web import get_driver, parse_yandex_maps from app.core.parse_web import WebParser
from app.settings import TELEGRAM_API_TOKEN from app.settings import TELEGRAM_API_TOKEN
@ -14,7 +14,6 @@ class TransportBot:
bot: Bot = Bot(TELEGRAM_API_TOKEN) bot: Bot = Bot(TELEGRAM_API_TOKEN)
dispatcher: Dispatcher = Dispatcher(bot) dispatcher: Dispatcher = Dispatcher(bot)
dispatcher.middleware.setup(LoggingMiddleware()) dispatcher.middleware.setup(LoggingMiddleware())
stations_cb: CallbackData = CallbackData('station', 'direction') stations_cb: CallbackData = CallbackData('station', 'direction')
@staticmethod @staticmethod
@ -46,8 +45,8 @@ class TransportBot:
async def home_office( async def home_office(
query: types.CallbackQuery, callback_data: dict[str, str] query: types.CallbackQuery, callback_data: dict[str, str]
) -> types.Message: ) -> types.Message:
driver = get_driver() driver = WebParser.get_driver()
text = parse_yandex_maps( text = WebParser.parse_yandex_maps(
driver=driver, driver=driver,
url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21', url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21',
message='Остановка Б. Академическая ул, д. 15', message='Остановка Б. Академическая ул, д. 15',
@ -62,8 +61,8 @@ class TransportBot:
async def office_home( async def office_home(
query: types.CallbackQuery, callback_data: dict[str, str] query: types.CallbackQuery, callback_data: dict[str, str]
) -> types.Message: ) -> types.Message:
driver = get_driver() driver = WebParser.get_driver()
text = parse_yandex_maps( text = WebParser.parse_yandex_maps(
driver=driver, driver=driver,
url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?ll=37.505402%2C55.800214&tab=overview&z=21', url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?ll=37.505402%2C55.800214&tab=overview&z=21',
message='Остановка Улица Алабяна', message='Остановка Улица Алабяна',
@ -87,8 +86,8 @@ class TransportBot:
if not chat_ids: if not chat_ids:
return None return None
driver = get_driver() driver = WebParser.get_driver()
text = parse_yandex_maps( text = WebParser.parse_yandex_maps(
driver=driver, driver=driver,
url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21', url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21',
message='Остановка Б. Академическая ул, д. 15', message='Остановка Б. Академическая ул, д. 15',

View File

@ -18,93 +18,97 @@ from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.webdriver import RemoteWebDriver, WebDriver from selenium.webdriver.firefox.webdriver import RemoteWebDriver, WebDriver
def download_gecko_driver() -> None: class WebParser:
gecko_driver = ( @staticmethod
f'https://github.com/mozilla/geckodriver/releases/download/v{GECKO_DRIVER_VERSION}/' def download_gecko_driver() -> None:
f'geckodriver-v{GECKO_DRIVER_VERSION}-linux64.tar.gz' gecko_driver_url = (
) f'https://github.com/mozilla/geckodriver/releases/download/v{GECKO_DRIVER_VERSION}/'
f'geckodriver-v{GECKO_DRIVER_VERSION}-linux64.tar.gz'
if not Path(BASE_DIR / 'geckodriver').exists():
logger.info(f'Downloading gecodriver v {GECKO_DRIVER_VERSION}...')
geckodriver_file = wget.download(
url=gecko_driver, out=BASE_DIR.resolve().as_posix()
) )
with tarfile.open(geckodriver_file) as tar: if not Path(BASE_DIR / 'geckodriver').exists():
tar.extractall(BASE_DIR) logger.info(f'Downloading gecodriver v {GECKO_DRIVER_VERSION}...')
os.remove(f'{BASE_DIR / "geckodriver"}-v{GECKO_DRIVER_VERSION}-linux64.tar.gz') geckodriver_file = wget.download(
logger.info(f'\ngeckodriver has been downloaded to folder {BASE_DIR}') url=gecko_driver_url, out=BASE_DIR.resolve().as_posix()
def configure_firefox_driver(private_window: bool = False) -> WebDriver | None:
opt = options.Options()
opt.headless = True
opt.add_argument('-profile')
opt.add_argument(f'{Path.home()}/snap/firefox/common/.mozilla/firefox')
if private_window:
opt.set_preference("browser.privatebrowsing.autostart", True)
service = Service(executable_path=(BASE_DIR / 'geckodriver').as_posix())
try:
firefox_driver = webdriver.Firefox(service=service, options=opt)
return firefox_driver
except WebDriverException:
logger.error('Error configuring webdriver. Possible it already configured')
return None
def parse_yandex_maps(
url: str, message: str, driver: RemoteWebDriver | None = None
) -> str:
if not driver:
logger.error('Driver is not configured')
return 'Что-то пошло не так. :( Драйвер Firefox не сконфигурирован.'
driver.get(url)
time.sleep(1)
bus_arrival: dict[str, str | None] = defaultdict(str)
try:
web_elements = driver.find_elements(
by='class name', value='masstransit-vehicle-snippet-view'
)
for web_element in web_elements:
bus = web_element.find_element(
by='class name', value='masstransit-vehicle-snippet-view__main-text'
) )
if bus:
bus_arrival_time = web_element.find_element( with tarfile.open(geckodriver_file) as tar:
by='class name', tar.extractall(BASE_DIR)
value='masstransit-prognoses-view__title-text', os.remove(
f'{BASE_DIR / "geckodriver"}-v{GECKO_DRIVER_VERSION}-linux64.tar.gz'
)
logger.info(f'\ngeckodriver has been downloaded to folder {BASE_DIR}')
@staticmethod
def configure_firefox_driver(private_window: bool = False) -> WebDriver | None:
opt = options.Options()
opt.headless = True
opt.add_argument('-profile')
opt.add_argument(f'{Path.home()}/snap/firefox/common/.mozilla/firefox')
if private_window:
opt.set_preference("browser.privatebrowsing.autostart", True)
service = Service(executable_path=(BASE_DIR / 'geckodriver').as_posix())
try:
firefox_driver = webdriver.Firefox(service=service, options=opt)
return firefox_driver
except WebDriverException:
logger.error('Error configuring webdriver. Possible it already configured')
return None
@staticmethod
def parse_yandex_maps(
url: str, message: str, driver: RemoteWebDriver | None = None
) -> str:
if not driver:
logger.error('Driver is not configured')
return 'Что-то пошло не так. :( Драйвер Firefox не сконфигурирован.'
driver.get(url)
time.sleep(1)
bus_arrival: dict[str, str | None] = defaultdict(str)
try:
web_elements = driver.find_elements(
by='class name', value='masstransit-vehicle-snippet-view'
)
for web_element in web_elements:
bus = web_element.find_element(
by='class name', value='masstransit-vehicle-snippet-view__main-text'
) )
match bus.text: if bus:
case "300": bus_arrival_time = web_element.find_element(
bus_arrival["Автобус 300"] = ( by='class name',
bus_arrival_time.text if bus_arrival_time else None value='masstransit-prognoses-view__title-text',
) )
case "т19": match bus.text:
bus_arrival["Автобус Т19"] = ( case "300":
bus_arrival_time.text if bus_arrival_time else None bus_arrival["Автобус 300"] = (
) bus_arrival_time.text if bus_arrival_time else None
except NoSuchElementException: )
pass case "т19":
except StaleElementReferenceException: bus_arrival["Автобус Т19"] = (
pass bus_arrival_time.text if bus_arrival_time else None
)
except NoSuchElementException:
pass
except StaleElementReferenceException:
pass
if not any(bus_arrival.values()): if not any(bus_arrival.values()):
return 'Автобусов 300 или Т19 не найдено. \n\nСмотри на карте :)' return 'Автобусов 300 или Т19 не найдено. \n\nСмотри на карте :)'
answer = f'{message}\n\n' answer = f'{message}\n\n'
for bus_name, arrival_time in bus_arrival.items(): for bus_name, arrival_time in bus_arrival.items():
answer += f'{bus_name} - {arrival_time}\n' answer += f'{bus_name} - {arrival_time}\n'
return answer return answer
@staticmethod
@timed_cache(seconds=DRIVER_SESSION_TTL) @timed_cache(seconds=DRIVER_SESSION_TTL)
def get_driver() -> RemoteWebDriver: def get_driver() -> RemoteWebDriver:
opt = options.Options() opt = options.Options()
opt.headless = True opt.headless = True
driver = RemoteWebDriver( driver = RemoteWebDriver(
command_executor='http://selenoid_host:4444/wd/hub', options=opt command_executor='http://selenoid_host:4444/wd/hub', options=opt
) )
return driver return driver

View File

@ -4,7 +4,7 @@ import pytest
from aiogram import Bot, Dispatcher, types from aiogram import Bot, Dispatcher, types
from aiogram.dispatcher.filters.builtin import Command from aiogram.dispatcher.filters.builtin import Command
from aiogram.types import Update from aiogram.types import Update
from app.core.bot import dispatcher from app.core.bot import TransportBot
from tests.conftest import FakeTelegram from tests.conftest import FakeTelegram
from tests.data.factories import UserFactory from tests.data.factories import UserFactory
@ -25,8 +25,8 @@ async def test_parse_yandex_maps(bot: Bot) -> None:
async def test_command1(bot: Bot) -> None: async def test_command1(bot: Bot) -> None:
dispatcher.bot = bot TransportBot.dispatcher.bot = bot
handlers = dispatcher.message_handlers.handlers handlers = TransportBot.dispatcher.message_handlers.handlers
for handler in handlers: for handler in handlers:
handl = list( handl = list(
filter(lambda obj: isinstance(obj.filter, Command), handler.filters) filter(lambda obj: isinstance(obj.filter, Command), handler.filters)
@ -66,5 +66,4 @@ async def test_update(dispatcher_fixture: Dispatcher, bot: Bot) -> None:
update = Update(**data) update = Update(**data)
dispatcher_fixture.message_handler() dispatcher_fixture.message_handler()
await dispatcher_fixture.process_update(update) await dispatcher_fixture.process_update(update)
assert True assert True