refactor parse web

This commit is contained in:
Dmitry Afanasyev 2022-08-28 01:33:35 +03:00
parent 97f22d7fb2
commit aaa7b5c57d
4 changed files with 47 additions and 47 deletions

View File

@ -4,7 +4,7 @@ from aiogram import Bot, types
from aiogram.contrib.middlewares.logging import LoggingMiddleware
from aiogram.dispatcher import Dispatcher
from aiogram.utils.callback_data import CallbackData
from app.core.parse_web import get_driver, parse_site
from app.core.parse_web import get_driver, parse_yandex_maps
from app.settings import TELEGRAM_API_TOKEN
bot = Bot(token=TELEGRAM_API_TOKEN)
@ -37,10 +37,9 @@ async def home_office(
query: types.CallbackQuery, callback_data: dict[str, str]
) -> types.Message:
driver = get_driver()
text = parse_site(
text = parse_yandex_maps(
driver=driver,
url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/'
'?l=masstransit&ll=37.527754%2C55.823507&tab=overview&z=21',
url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21',
message='Остановка Б. Академическая ул, д. 15',
)
@ -54,10 +53,9 @@ async def office_home(
query: types.CallbackQuery, callback_data: dict[str, str]
) -> types.Message:
driver = get_driver()
text = parse_site(
text = parse_yandex_maps(
driver=driver,
url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?'
'l=masstransit&ll=37.505338%2C55.800160&tab=overview&z=211',
url='https://yandex.ru/maps/213/moscow/stops/stop__9640288/?ll=37.505402%2C55.800214&tab=overview&z=21',
message='Остановка Улица Алабяна',
)
@ -80,10 +78,9 @@ async def echo(message: types.Message) -> types.Message:
async def morning_bus_mailing(chat_ids: list[int]) -> None:
driver = get_driver()
text = parse_site(
text = parse_yandex_maps(
driver=driver,
url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/'
'?l=masstransit&ll=37.527754%2C55.823507&tab=overview&z=21',
url='https://yandex.ru/maps/213/moscow/stops/stop__9640740/?ll=37.527924%2C55.823470&tab=overview&z=21',
message='Остановка Б. Академическая ул, д. 15',
)
await asyncio.gather(

View File

@ -1,6 +1,7 @@
import os
import tarfile
import time
from collections import defaultdict
from pathlib import Path
import wget
@ -51,51 +52,53 @@ def configure_firefox_driver(private_window: bool = False) -> WebDriver | None:
return None
def parse_site(url: str, message: str, driver: RemoteWebDriver | None = None) -> str:
def parse_yandex_maps(
url: str, message: str, driver: RemoteWebDriver | None = None
) -> str:
if not driver:
logger.error('Driver is not configured')
return 'Что-то пошло не так. :( Драйвер Firefox не сконфигурирован.'
driver.get(url)
time.sleep(1)
bus_300, bus_t19 = None, None
bus_300_arrival, bus_t19_arrival = None, None
bus_arrival: dict[str, str | None] = defaultdict(str)
elements = driver.find_elements(
by='class name', value='masstransit-brief-schedule-view'
)
try:
web_elements = driver.find_elements(
by='class name', value='masstransit-vehicle-snippet-view'
)
for web_element in web_elements:
bus = web_element.find_element(
by='class name', value='masstransit-vehicle-snippet-view__main-text'
)
if bus:
bus_arrival_time = web_element.find_element(
by='class name',
value='masstransit-prognoses-view__title-text',
)
match bus.text:
case "300":
bus_arrival["Автобус 300"] = (
bus_arrival_time.text if bus_arrival_time else None
)
case "т19":
bus_arrival["Автобус Т19"] = (
bus_arrival_time.text if bus_arrival_time else None
)
except NoSuchElementException:
pass
except StaleElementReferenceException:
pass
for element in elements:
try:
bus_300 = element.find_element(
by='css selector', value='[aria-label="300"]'
)
bus_300_arrival = element.find_element(
by='class name', value='masstransit-prognoses-view__title-text'
)
bus_t19 = element.find_element(
by='css selector', value='[aria-label="т19"]'
)
bus_t19_arrival = element.find_element(
by='class name', value='masstransit-prognoses-view__title-text'
)
except NoSuchElementException:
pass
except StaleElementReferenceException:
pass
no_bus_at_all = True
answer = f'{message}\n\n'
if bus_300 and bus_300_arrival:
answer += f'Автобус {bus_300.text} - {bus_300_arrival.text}\n'
no_bus_at_all = False
if bus_t19 and bus_t19_arrival:
answer += f'Автобус {bus_t19.text} - {bus_t19_arrival.text}'
no_bus_at_all = False
if not no_bus_at_all:
return answer
if no_bus_at_all:
if not any(bus_arrival.values()):
return 'Автобусов 300 или Т19 не найдено. \n\nСмотри на карте :)'
answer = f'{message}\n\n'
for bus_name, arrival_time in bus_arrival.items():
answer += f'{bus_name} - {arrival_time}\n'
return answer
@timed_cache(seconds=DRIVER_SESSION_TTL)
def get_driver() -> RemoteWebDriver:

View File

@ -22,7 +22,7 @@ from app.settings import (
WEBHOOK_URL,
)
queue = asyncio.Queue() # type: ignore
queue: asyncio.Queue = asyncio.Queue() # type: ignore
async def on_startup(dp: Dispatcher) -> None:

View File

@ -13,7 +13,7 @@ pytestmark = [
]
async def test_parse_site(bot: Bot) -> None:
async def test_parse_yandex_maps(bot: Bot) -> None:
tg_user = UserFactory().as_dict()
user = types.User(**tg_user)