mirror of
https://github.com/Balshgit/public.git
synced 2026-02-04 10:00:39 +03:00
add async search to celery task and logger
This commit is contained in:
@@ -1,93 +1,120 @@
|
||||
import requests
|
||||
from requests.models import Response
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import re
|
||||
import time
|
||||
from functools import lru_cache
|
||||
from typing import Dict, Optional
|
||||
import sys
|
||||
from typing import Dict, Optional, Tuple
|
||||
from server.apps.main.celery_config import celery_app
|
||||
from server.settings.components.common import GIT_API_URL
|
||||
from celery_progress.backend import ProgressRecorder
|
||||
from celery import shared_task
|
||||
from server.settings.components import config
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
console_logger = logging.getLogger(__name__)
|
||||
formatter = logging.Formatter(datefmt="%Y.%m.%d %H:%M:%S",
|
||||
fmt='%(asctime)s | message: %(message)s')
|
||||
# fmt='%(asctime)s | %(levelname)s | process: %(process)d | module name: %(name)s | '
|
||||
# 'func name: %(funcName)s | line number: %(lineno)s | message: %(message)s',)
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(formatter)
|
||||
console_logger.setLevel(logging.INFO)
|
||||
console_logger.addHandler(handler)
|
||||
|
||||
|
||||
def current_page(response: Response, link: str) -> int:
|
||||
url = str(response.links[f'{link}']['url'])
|
||||
page_count = int(str(re.findall(pattern=r'page=\d+', string=url)[1])
|
||||
.replace('page=', ''))
|
||||
return page_count
|
||||
class GitHubScanner:
|
||||
def __init__(self, user: str, token: str):
|
||||
self.auth = aiohttp.BasicAuth(user, token)
|
||||
self.data = {}
|
||||
self.semaphore = asyncio.Semaphore(200)
|
||||
|
||||
|
||||
def github_request(url: str) -> Response:
|
||||
auth = HTTPBasicAuth(config('GITHUB_USERNAME'), config('GITHUB_PASSWORD'))
|
||||
counter = 0
|
||||
while True:
|
||||
def _data_count(self) -> int:
|
||||
repos_count = 0
|
||||
try:
|
||||
counter += 1
|
||||
if auth == HTTPBasicAuth('', ''):
|
||||
response = requests.get(url)
|
||||
else:
|
||||
response = requests.get(url, auth=auth)
|
||||
return response
|
||||
except ConnectionError as connection_error:
|
||||
if counter < 5:
|
||||
time.sleep(10)
|
||||
else:
|
||||
raise connection_error
|
||||
for data_set in self.data.values():
|
||||
repos_count += len(data_set['data'])
|
||||
except ValueError:
|
||||
console_logger.info(f'Data is empty')
|
||||
return repos_count
|
||||
|
||||
@staticmethod
|
||||
def _page_count(url: str) -> int:
|
||||
page = int(str(re.findall(pattern=r'&page=\d+', string=url)[-1]).replace('&page=', ''))
|
||||
return page
|
||||
|
||||
async def _github_request(self, session: aiohttp.ClientSession, url: str) -> Dict:
|
||||
async with self.semaphore:
|
||||
counter = 0
|
||||
while True:
|
||||
try:
|
||||
counter += 1
|
||||
resp = await session.get(url)
|
||||
async with resp:
|
||||
if resp.status == 200:
|
||||
self.data[self._page_count(url)] = {'response': resp, 'data': await resp.json()}
|
||||
return self.data[self._page_count(url)]
|
||||
if resp.status >= 400:
|
||||
return {'response': None, 'data': None}
|
||||
except Exception as connection_error:
|
||||
if counter < 5:
|
||||
await asyncio.sleep(10)
|
||||
else:
|
||||
raise connection_error
|
||||
|
||||
async def get_data(self, celery_task, username: str) -> None:
|
||||
base_url = f'{GIT_API_URL}/{username}/repos?per_page=100&page=' + '{}'
|
||||
progress_recorder = ProgressRecorder(celery_task)
|
||||
connector = aiohttp.TCPConnector(limit=500)
|
||||
async with aiohttp.ClientSession(auth=self.auth, connector=connector) as session:
|
||||
url = base_url.format(1)
|
||||
tasks = []
|
||||
try:
|
||||
resp = await self._github_request(session, url)
|
||||
self.data[1] = resp
|
||||
last_page = self._page_count(dict(resp['response'].headers).get('Link'))
|
||||
last_page_url = str(resp['response'].links['last']['url'])
|
||||
if last_page:
|
||||
data_last_page = await self._github_request(session, last_page_url)
|
||||
repos_count = (last_page - 1) * 100 + len(data_last_page['data'])
|
||||
for i in range(1, last_page):
|
||||
url = base_url.format(i + 1)
|
||||
current_repos_count = self._data_count()
|
||||
percent = round(current_repos_count / repos_count * 100)
|
||||
progress_recorder.set_progress(current_repos_count, repos_count,
|
||||
description=f'Processing: {percent}%')
|
||||
task = asyncio.create_task(self._github_request(session, url))
|
||||
tasks.append(task)
|
||||
else:
|
||||
tasks.append(asyncio.create_task(self._github_request(session, url)))
|
||||
except Exception as e:
|
||||
console_logger.error(e)
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
def get_github_stars(self, username: str) -> Dict[str, Optional[int]]:
|
||||
|
||||
url = f'{GIT_API_URL}/{username}/repos?per_page=100&page=1'
|
||||
print(url)
|
||||
progress_recorder = ProgressRecorder(self)
|
||||
|
||||
response = github_request(url)
|
||||
if response.status_code >= 400:
|
||||
@celery_app.task(bind=True)
|
||||
def get_github_stars(celery_task, username: str) -> Dict[str, Optional[int]]:
|
||||
github = GitHubScanner(config('GITHUB_USERNAME'), config('GITHUB_TOKEN'))
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(github.get_data(celery_task, username))
|
||||
repos_data = github.data
|
||||
data = {}
|
||||
try:
|
||||
for value in repos_data.values():
|
||||
for item in value['data']:
|
||||
data[item["name"]] = item["stargazers_count"]
|
||||
result = dict(sorted(data.items(), key=lambda x: x[1], reverse=True))
|
||||
except TypeError:
|
||||
result = {}
|
||||
else:
|
||||
repos = response.json()
|
||||
|
||||
try:
|
||||
page_count = current_page(response, 'last')
|
||||
repos_count = (page_count - 1) * 100 + \
|
||||
len(github_request(response.links['last']['url']).json())
|
||||
except KeyError as e:
|
||||
page_count = 1
|
||||
repos_count = len(repos)
|
||||
|
||||
i = 0
|
||||
while 'next' in response.links.keys():
|
||||
i += 1
|
||||
response = github_request(response.links['next']['url'])
|
||||
repos.extend(response.json())
|
||||
current = i * 100 + len(response.json())
|
||||
|
||||
# Progress bar
|
||||
percent = round(100 / page_count * i)
|
||||
progress_recorder.set_progress(current, repos_count,
|
||||
description=f'Processing: {percent}%')
|
||||
|
||||
# Fetching repos and stars in dict
|
||||
data: Dict[str, int] = {}
|
||||
try:
|
||||
for item in repos:
|
||||
data[item['name']] = int(item['stargazers_count'])
|
||||
result = dict(sorted(data.items(), key=lambda x: x[1], reverse=True))
|
||||
except TypeError:
|
||||
result = {}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Demo task
|
||||
@shared_task(bind=True)
|
||||
def process_download(self):
|
||||
def process_download(task) -> str:
|
||||
print('Task started')
|
||||
# Create the progress recorder instance
|
||||
# which we'll use to update the web page
|
||||
progress_recorder = ProgressRecorder(self)
|
||||
progress_recorder = ProgressRecorder(task)
|
||||
|
||||
print('Start')
|
||||
for i in range(5):
|
||||
|
||||
@@ -11,7 +11,8 @@
|
||||
</head>
|
||||
|
||||
<body>
|
||||
{% block demo %}{% endblock %}
|
||||
{% block demo %}
|
||||
{% endblock %}
|
||||
<!-- JQuery -->
|
||||
<script src="https://code.jquery.com/jquery-3.5.1.min.js" integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin="anonymous"></script>
|
||||
<!-- Bootstrap JS -->
|
||||
@@ -19,7 +20,8 @@
|
||||
<!-- Celery Progress -->
|
||||
<script src="{% static 'celery_progress/celery_progress.js' %}"></script>
|
||||
|
||||
{% block progress_bar_js %}{% endblock progress_bar_js %}
|
||||
{% block progress_bar_js %}
|
||||
{% endblock progress_bar_js %}
|
||||
<div class="container text-center" style="padding-top: 20px;">
|
||||
{{ message }}
|
||||
{% for repo, stars in data.items %}
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
<!-- Download Status -->
|
||||
<div class="container" style="padding-top: 20px;">
|
||||
<div class="card" style="height: 120px;">
|
||||
{% block progress %}{% endblock progress %}
|
||||
{% block progress %}
|
||||
{% endblock progress %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
from django.http import HttpRequest, HttpResponse
|
||||
from django.shortcuts import render, redirect
|
||||
from django.urls import reverse
|
||||
from django.urls import reverse, reverse_lazy
|
||||
from .forms import GithubForm
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from .commands import get_github_stars, process_download
|
||||
from .commands import get_github_stars, console_logger, process_download
|
||||
from django.views.decorators.http import require_http_methods
|
||||
from celery.result import AsyncResult
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
task_id = {}
|
||||
@@ -29,10 +30,9 @@ def github(request: HttpRequest) -> HttpResponse:
|
||||
try:
|
||||
email = getattr((User.objects.get(username=username)),
|
||||
'email', 'default@email.ru')
|
||||
|
||||
except ObjectDoesNotExist as e:
|
||||
error = 'That user doesnt exists or not log on'
|
||||
print(error, e)
|
||||
console_logger.error(error, e)
|
||||
|
||||
if request.method == 'POST':
|
||||
|
||||
@@ -40,7 +40,7 @@ def github(request: HttpRequest) -> HttpResponse:
|
||||
result = get_github_stars.delay(github_username)
|
||||
task_id[username] = result.task_id
|
||||
|
||||
return redirect(reverse('github_result'))
|
||||
return redirect(reverse_lazy('github_result'))
|
||||
|
||||
form = GithubForm
|
||||
return render(request, 'main/github.html',
|
||||
@@ -53,18 +53,21 @@ def github_result(request: HttpRequest) -> HttpResponse:
|
||||
username = str(request.user.username)
|
||||
data = AsyncResult(task_id[username])
|
||||
|
||||
result = {}
|
||||
message = ''
|
||||
if data.ready():
|
||||
message = "Result Ready"
|
||||
result = data.get()
|
||||
print('result ready')
|
||||
console_logger.info('result ready')
|
||||
else:
|
||||
print('result not ready')
|
||||
console_logger.info('result not ready')
|
||||
|
||||
return render(request, 'main/github_result.html',
|
||||
context={'data': result,
|
||||
'message': message})
|
||||
|
||||
|
||||
@lru_cache(maxsize=10)
|
||||
def demo_view(request: HttpRequest) -> HttpResponse:
|
||||
username = str(request.user.username)
|
||||
form = GithubForm
|
||||
@@ -79,11 +82,11 @@ def demo_view(request: HttpRequest) -> HttpResponse:
|
||||
message = f'Total repos: {len(result)}\n'
|
||||
if len(result) == 0:
|
||||
result = {'Error': 'User has no repositories!'}
|
||||
print('Result ready! Please refresh page')
|
||||
console_logger.info('Result ready! Please refresh page')
|
||||
else:
|
||||
print('result not ready')
|
||||
console_logger.info('result not ready')
|
||||
except KeyError as e:
|
||||
print(e)
|
||||
console_logger.error(e)
|
||||
finally:
|
||||
# Return demo view
|
||||
return render(request, 'progress.html',
|
||||
@@ -99,7 +102,7 @@ def demo_view(request: HttpRequest) -> HttpResponse:
|
||||
# Get ID
|
||||
task_id[username] = result.task_id
|
||||
# Print Task ID
|
||||
print(f'Celery Task ID: {task_id[username]}')
|
||||
console_logger.info(f'Celery Task ID: {task_id[username]}')
|
||||
# Return demo view with Task ID
|
||||
return render(request, 'progress.html',
|
||||
context={'task_id': task_id[username],
|
||||
|
||||
Reference in New Issue
Block a user