-
Notifications
You must be signed in to change notification settings - Fork 846
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Mark 404'd Repos to be Ignored and Re-collect Errored Repos #2678
Changes from all commits
08666c9
a8bf6f7
1934a2d
01c5c73
801e5e0
9a76471
5a77d65
23f17e9
b70f524
68e50de
9aa9725
45be12f
4c8edae
be5c19c
0a52999
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,16 +20,7 @@ | |
from augur.application.db.engine import get_database_string | ||
from augur.tasks.init import get_redis_conn_values, get_rabbitmq_conn_string | ||
from augur.application.db.models import CollectionStatus, Repo | ||
|
||
class CollectionState(Enum): | ||
SUCCESS = "Success" | ||
PENDING = "Pending" | ||
ERROR = "Error" | ||
COLLECTING = "Collecting" | ||
INITIALIZING = "Initializing" | ||
UPDATE = "Update" | ||
FAILED_CLONE = "Failed Clone" | ||
|
||
from augur.tasks.util.collection_state import CollectionState | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -85,7 +76,7 @@ class CollectionState(Enum): | |
#Classes for tasks that take a repo_git as an argument. | ||
class AugurCoreRepoCollectionTask(celery.Task): | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_hook='core'): | ||
def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_hook='core',after_fail=CollectionState.ERROR.value): | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from augur.tasks.init.celery_app import engine | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
logger = AugurLogger(logger_name).get_logger() | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -104,7 +95,7 @@ def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_h | |
prevStatus = getattr(repoStatus, f"{collection_hook}_status") | ||
|
||
if prevStatus == CollectionState.COLLECTING.value or prevStatus == CollectionState.INITIALIZING.value: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
setattr(repoStatus, f"{collection_hook}_status", CollectionState.ERROR.value) | ||
setattr(repoStatus, f"{collection_hook}_status", after_fail) | ||
setattr(repoStatus, f"{collection_hook}_task_id", None) | ||
session.commit() | ||
|
||
|
@@ -129,6 +120,7 @@ def on_failure(self,exc,task_id,args,kwargs,einfo): | |
repo_git = args[0] | ||
self.augur_handle_task_failure(exc,task_id,repo_git, "ml_task_failure", collection_hook='ml') | ||
|
||
|
||
#task_cls='augur.tasks.init.celery_app:AugurCoreRepoCollectionTask' | ||
celery_app = Celery('tasks', broker=BROKER_URL, backend=BACKEND_URL, include=tasks) | ||
|
||
|
@@ -209,7 +201,7 @@ def setup_periodic_tasks(sender, **kwargs): | |
""" | ||
from celery.schedules import crontab | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from augur.tasks.start_tasks import augur_collection_monitor, augur_collection_update_weights | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.start_tasks import non_repo_domain_tasks | ||
from augur.tasks.start_tasks import non_repo_domain_tasks, retry_errored_repos | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.git.facade_tasks import clone_repos | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from augur.tasks.db.refresh_materialized_views import refresh_materialized_views | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -234,6 +226,9 @@ def setup_periodic_tasks(sender, **kwargs): | |
logger.info(f"Scheduling update of collection weights on midnight each day") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
sender.add_periodic_task(crontab(hour=0, minute=0),augur_collection_update_weights.s()) | ||
|
||
logger.info(f"Setting 404 repos to be marked for retry on midnight each day") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
sender.add_periodic_task(crontab(hour=0, minute=0),retry_errored_repos.s()) | ||
|
||
logger.info(f"Scheduling contributor breadth every 30 days") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
thirty_days_in_seconds = 30*24*60*60 | ||
sender.add_periodic_task(thirty_days_in_seconds, contributor_breadth_model.s()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,9 +33,9 @@ | |
from augur.tasks.init.celery_app import celery_app as celery | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.application.db.session import DatabaseSession | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from logging import Logger | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from enum import Enum | ||
from augur.tasks.util.redis_list import RedisList | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.application.db.models import CollectionStatus, Repo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.util.collection_state import CollectionState | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.util.collection_util import * | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
|
||
|
@@ -328,9 +328,41 @@ def augur_collection_update_weights(): | |
session.commit() | ||
#git_update_commit_count_weight(repo_git) | ||
|
||
@celery.task | ||
def retry_errored_repos(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
""" | ||
Periodic task to reset repositories that have errored and try again. | ||
""" | ||
from augur.tasks.init.celery_app import engine | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger = logging.getLogger(create_collection_status_records.__name__) | ||
|
||
#TODO: Isaac needs to normalize the status's to be abstract in the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
#collection_status table once augur dev is less unstable. | ||
with DatabaseSession(logger,engine) as session: | ||
query = s.sql.text(f"""UPDATE repo SET secondary_staus = {CollectionState.PENDING.value}""" | ||
f""" WHERE secondary_status = '{CollectionState.ERROR.value}' ;""" | ||
f"""UPDATE repo SET core_status = {CollectionState.PENDING.value}""" | ||
f""" WHERE core_status = '{CollectionState.ERROR.value}' ;""" | ||
f"""UPDATE repo SET facade_status = {CollectionState.PENDING.value}""" | ||
f""" WHERE facade_status = '{CollectionState.ERROR.value}' ;""" | ||
f"""UPDATE repo SET ml_status = {CollectionState.PENDING.value}""" | ||
f""" WHERE ml_status = '{CollectionState.ERROR.value}' ;""" | ||
) | ||
|
||
session.execute_sql(query) | ||
|
||
|
||
|
||
#Retry this task for every issue so that repos that were added manually get the chance to be added to the collection_status table. | ||
@celery.task(autoretry_for=(Exception,), retry_backoff=True, retry_backoff_max=300, retry_jitter=True, max_retries=None) | ||
def create_collection_status_records(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
""" | ||
Automatic task that runs and checks for repos that haven't been given a collection_status | ||
record corresponding to the state of their collection at the monent. | ||
|
||
A special celery task that automatically retries itself and has no max retries. | ||
""" | ||
|
||
from augur.tasks.init.celery_app import engine | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
logger = logging.getLogger(create_collection_status_records.__name__) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from enum import Enum | ||
|
||
class CollectionState(Enum): | ||
IsaacMilarky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Enum of possible states a repository's collection | ||
can have whether it is core, secondary, facade, etc. | ||
|
||
Attributes: | ||
|
||
SUCCESS: State of success for the jobs in that collection hook | ||
PENDING: Means the repo has not had collection run at all | ||
ERROR: The collection hook has crashed | ||
COLLECTING: The collection hook is running | ||
INITIALIZING: Only for facade, indicates the repo is being cloned via git | ||
UPDATE: Only for facade, indicates the repo has been cloned | ||
FAILED_CLONE: Only for facade, indicates the clone has failed (usually 404) | ||
STANDBY: Indicates the repo has been paused | ||
IGNORE: Repo has encountered an error and we will not try again (usually 404) | ||
""" | ||
|
||
SUCCESS = "Success" | ||
PENDING = "Pending" | ||
ERROR = "Error" | ||
COLLECTING = "Collecting" | ||
INITIALIZING = "Initializing" | ||
UPDATE = "Update" | ||
FAILED_CLONE = "Failed Clone" | ||
STANDBY = "Standby" | ||
IGNORE = "Ignore" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,18 +24,9 @@ | |
from augur.tasks.github.util.github_task_session import GithubTaskManifest | ||
from augur.application.db.session import DatabaseSession | ||
from augur.tasks.util.worker_util import calculate_date_weight_from_timestamps | ||
from augur.tasks.util.collection_state import CollectionState | ||
|
||
|
||
# class syntax | ||
class CollectionState(Enum): | ||
SUCCESS = "Success" | ||
PENDING = "Pending" | ||
ERROR = "Error" | ||
COLLECTING = "Collecting" | ||
INITIALIZING = "Initializing" | ||
UPDATE = "Update" | ||
FAILED_CLONE = "Failed Clone" | ||
|
||
def get_list_of_all_users(session): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
#Get a list of all users. | ||
query = s.sql.text(""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pylint] reported by reviewdog 🐶
W0612: Unused variable 'e' (unused-variable)