add async search to celery task and logger

This commit is contained in:
2021-10-07 02:06:51 +03:00
parent d3d862eaef
commit 9f8c186128
94 changed files with 526 additions and 7035 deletions

View File

@@ -1,93 +1,120 @@
import requests
from requests.models import Response
from requests.auth import HTTPBasicAuth
import re
import time
from functools import lru_cache
from typing import Dict, Optional
import sys
from typing import Dict, Optional, Tuple
from server.apps.main.celery_config import celery_app
from server.settings.components.common import GIT_API_URL
from celery_progress.backend import ProgressRecorder
from celery import shared_task
from server.settings.components import config
import aiohttp
import asyncio
import logging
console_logger = logging.getLogger(__name__)
formatter = logging.Formatter(datefmt="%Y.%m.%d %H:%M:%S",
fmt='%(asctime)s | message: %(message)s')
# fmt='%(asctime)s | %(levelname)s | process: %(process)d | module name: %(name)s | '
# 'func name: %(funcName)s | line number: %(lineno)s | message: %(message)s',)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
console_logger.setLevel(logging.INFO)
console_logger.addHandler(handler)
def current_page(response: Response, link: str) -> int:
url = str(response.links[f'{link}']['url'])
page_count = int(str(re.findall(pattern=r'page=\d+', string=url)[1])
.replace('page=', ''))
return page_count
class GitHubScanner:
def __init__(self, user: str, token: str):
self.auth = aiohttp.BasicAuth(user, token)
self.data = {}
self.semaphore = asyncio.Semaphore(200)
def github_request(url: str) -> Response:
auth = HTTPBasicAuth(config('GITHUB_USERNAME'), config('GITHUB_PASSWORD'))
counter = 0
while True:
def _data_count(self) -> int:
repos_count = 0
try:
counter += 1
if auth == HTTPBasicAuth('', ''):
response = requests.get(url)
else:
response = requests.get(url, auth=auth)
return response
except ConnectionError as connection_error:
if counter < 5:
time.sleep(10)
else:
raise connection_error
for data_set in self.data.values():
repos_count += len(data_set['data'])
except ValueError:
console_logger.info(f'Data is empty')
return repos_count
@staticmethod
def _page_count(url: str) -> int:
page = int(str(re.findall(pattern=r'&page=\d+', string=url)[-1]).replace('&page=', ''))
return page
async def _github_request(self, session: aiohttp.ClientSession, url: str) -> Dict:
async with self.semaphore:
counter = 0
while True:
try:
counter += 1
resp = await session.get(url)
async with resp:
if resp.status == 200:
self.data[self._page_count(url)] = {'response': resp, 'data': await resp.json()}
return self.data[self._page_count(url)]
if resp.status >= 400:
return {'response': None, 'data': None}
except Exception as connection_error:
if counter < 5:
await asyncio.sleep(10)
else:
raise connection_error
async def get_data(self, celery_task, username: str) -> None:
base_url = f'{GIT_API_URL}/{username}/repos?per_page=100&page=' + '{}'
progress_recorder = ProgressRecorder(celery_task)
connector = aiohttp.TCPConnector(limit=500)
async with aiohttp.ClientSession(auth=self.auth, connector=connector) as session:
url = base_url.format(1)
tasks = []
try:
resp = await self._github_request(session, url)
self.data[1] = resp
last_page = self._page_count(dict(resp['response'].headers).get('Link'))
last_page_url = str(resp['response'].links['last']['url'])
if last_page:
data_last_page = await self._github_request(session, last_page_url)
repos_count = (last_page - 1) * 100 + len(data_last_page['data'])
for i in range(1, last_page):
url = base_url.format(i + 1)
current_repos_count = self._data_count()
percent = round(current_repos_count / repos_count * 100)
progress_recorder.set_progress(current_repos_count, repos_count,
description=f'Processing: {percent}%')
task = asyncio.create_task(self._github_request(session, url))
tasks.append(task)
else:
tasks.append(asyncio.create_task(self._github_request(session, url)))
except Exception as e:
console_logger.error(e)
await asyncio.gather(*tasks)
@shared_task(bind=True)
def get_github_stars(self, username: str) -> Dict[str, Optional[int]]:
url = f'{GIT_API_URL}/{username}/repos?per_page=100&page=1'
print(url)
progress_recorder = ProgressRecorder(self)
response = github_request(url)
if response.status_code >= 400:
@celery_app.task(bind=True)
def get_github_stars(celery_task, username: str) -> Dict[str, Optional[int]]:
github = GitHubScanner(config('GITHUB_USERNAME'), config('GITHUB_TOKEN'))
loop = asyncio.get_event_loop()
loop.run_until_complete(github.get_data(celery_task, username))
repos_data = github.data
data = {}
try:
for value in repos_data.values():
for item in value['data']:
data[item["name"]] = item["stargazers_count"]
result = dict(sorted(data.items(), key=lambda x: x[1], reverse=True))
except TypeError:
result = {}
else:
repos = response.json()
try:
page_count = current_page(response, 'last')
repos_count = (page_count - 1) * 100 + \
len(github_request(response.links['last']['url']).json())
except KeyError as e:
page_count = 1
repos_count = len(repos)
i = 0
while 'next' in response.links.keys():
i += 1
response = github_request(response.links['next']['url'])
repos.extend(response.json())
current = i * 100 + len(response.json())
# Progress bar
percent = round(100 / page_count * i)
progress_recorder.set_progress(current, repos_count,
description=f'Processing: {percent}%')
# Fetching repos and stars in dict
data: Dict[str, int] = {}
try:
for item in repos:
data[item['name']] = int(item['stargazers_count'])
result = dict(sorted(data.items(), key=lambda x: x[1], reverse=True))
except TypeError:
result = {}
return result
# Demo task
@shared_task(bind=True)
def process_download(self):
def process_download(task) -> str:
print('Task started')
# Create the progress recorder instance
# which we'll use to update the web page
progress_recorder = ProgressRecorder(self)
progress_recorder = ProgressRecorder(task)
print('Start')
for i in range(5):

View File

@@ -11,7 +11,8 @@
</head>
<body>
{% block demo %}{% endblock %}
{% block demo %}
{% endblock %}
<!-- JQuery -->
<script src="https://code.jquery.com/jquery-3.5.1.min.js" integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin="anonymous"></script>
<!-- Bootstrap JS -->
@@ -19,7 +20,8 @@
<!-- Celery Progress -->
<script src="{% static 'celery_progress/celery_progress.js' %}"></script>
{% block progress_bar_js %}{% endblock progress_bar_js %}
{% block progress_bar_js %}
{% endblock progress_bar_js %}
<div class="container text-center" style="padding-top: 20px;">
{{ message }}
{% for repo, stars in data.items %}

View File

@@ -34,7 +34,8 @@
<!-- Download Status -->
<div class="container" style="padding-top: 20px;">
<div class="card" style="height: 120px;">
{% block progress %}{% endblock progress %}
{% block progress %}
{% endblock progress %}
</div>
</div>
{% endblock %}

View File

@@ -1,13 +1,14 @@
from django.http import HttpRequest, HttpResponse
from django.shortcuts import render, redirect
from django.urls import reverse
from django.urls import reverse, reverse_lazy
from .forms import GithubForm
from django.contrib.auth.decorators import login_required
from django.contrib.auth.models import User
from django.core.exceptions import ObjectDoesNotExist
from .commands import get_github_stars, process_download
from .commands import get_github_stars, console_logger, process_download
from django.views.decorators.http import require_http_methods
from celery.result import AsyncResult
from functools import lru_cache
task_id = {}
@@ -29,10 +30,9 @@ def github(request: HttpRequest) -> HttpResponse:
try:
email = getattr((User.objects.get(username=username)),
'email', 'default@email.ru')
except ObjectDoesNotExist as e:
error = 'That user doesnt exists or not log on'
print(error, e)
console_logger.error(error, e)
if request.method == 'POST':
@@ -40,7 +40,7 @@ def github(request: HttpRequest) -> HttpResponse:
result = get_github_stars.delay(github_username)
task_id[username] = result.task_id
return redirect(reverse('github_result'))
return redirect(reverse_lazy('github_result'))
form = GithubForm
return render(request, 'main/github.html',
@@ -53,18 +53,21 @@ def github_result(request: HttpRequest) -> HttpResponse:
username = str(request.user.username)
data = AsyncResult(task_id[username])
result = {}
message = ''
if data.ready():
message = "Result Ready"
result = data.get()
print('result ready')
console_logger.info('result ready')
else:
print('result not ready')
console_logger.info('result not ready')
return render(request, 'main/github_result.html',
context={'data': result,
'message': message})
@lru_cache(maxsize=10)
def demo_view(request: HttpRequest) -> HttpResponse:
username = str(request.user.username)
form = GithubForm
@@ -79,11 +82,11 @@ def demo_view(request: HttpRequest) -> HttpResponse:
message = f'Total repos: {len(result)}\n'
if len(result) == 0:
result = {'Error': 'User has no repositories!'}
print('Result ready! Please refresh page')
console_logger.info('Result ready! Please refresh page')
else:
print('result not ready')
console_logger.info('result not ready')
except KeyError as e:
print(e)
console_logger.error(e)
finally:
# Return demo view
return render(request, 'progress.html',
@@ -99,7 +102,7 @@ def demo_view(request: HttpRequest) -> HttpResponse:
# Get ID
task_id[username] = result.task_id
# Print Task ID
print(f'Celery Task ID: {task_id[username]}')
console_logger.info(f'Celery Task ID: {task_id[username]}')
# Return demo view with Task ID
return render(request, 'progress.html',
context={'task_id': task_id[username],