Skip to content

Commit

Permalink
Remove retry circular dependency (#1969)
Browse files Browse the repository at this point in the history
Removing usage of `@retry.wrap` in logs methods.

This removal is necessary because retries also use logs inside of their
implementation so we want to avoid the following cyclic scenario:

1. Log method is called
2. A log fails (e.g. permission issue)
3. The logging method uses a retry wrapper, so it tries again
4. The code inside the retry wrapper also calls a log method, so we go
back to step 1

Before this PR, this scenario was avoided by using a `log_retries`
parameter, that would disable loggings on retries functions when the
retry was called from a log function.

This PR remove this parameter and get rid of the circular dependency by
making a custom retry logic inside of the logs methods.

---------

Co-authored-by: Gustavo Chaves Galdino de Moraes <[email protected]>
Co-authored-by: jonathanmetzman <[email protected]>
  • Loading branch information
3 people authored Apr 22, 2024
1 parent e83b9be commit 162ca0c
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 44 deletions.
78 changes: 47 additions & 31 deletions common/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
# limitations under the License.
"""Set up for logging."""
from enum import Enum

import logging
import os
import sys
import time
import traceback

import google.cloud.logging
Expand All @@ -25,7 +27,6 @@
# Disable this check since we have a bunch of non-constant globals in this file.
# pylint: disable=invalid-name

from common import retry
from common import utils

_default_logger = None
Expand All @@ -35,8 +36,9 @@

LOG_LENGTH_LIMIT = 250 * 1000

NUM_RETRIES = 5
NUM_ATTEMPTS = 4
RETRY_DELAY = 1
BACKOFF = 2


def _initialize_cloud_clients():
Expand Down Expand Up @@ -153,47 +155,61 @@ class LogSeverity(Enum):
DEBUG = logging.DEBUG


@retry.wrap(NUM_RETRIES, RETRY_DELAY, 'common.logs.log', log_retries=False)
def log(logger, severity, message, *args, extras=None):
"""Log a message with severity |severity|. If using stack driver logging
then |extras| is also logged (in addition to default extras)."""
message = str(message)
if args:
message = message % args

if utils.is_local():
if extras:
message += ' Extras: ' + str(extras)
logging.log(severity, message)
return

if logger is None:
logger = _default_logger
assert logger

struct_message = {
'message': message,
}
all_extras = _default_extras.copy()
extras = extras or {}
all_extras.update(extras)
struct_message.update(all_extras)
severity = LogSeverity(severity).name
logger.log_struct(struct_message, severity=severity)
# Custom retry logic to avoid circular dependency as retry from
# retry.py uses log.
for num_try in range(1, NUM_ATTEMPTS + 1):
try:
message = str(message)
if args:
message = message % args

if utils.is_local():
if extras:
message += ' Extras: ' + str(extras)
logging.log(severity, message)
return

if logger is None:
logger = _default_logger
assert logger

struct_message = {
'message': message,
}
all_extras = _default_extras.copy()
extras = extras or {}
all_extras.update(extras)
struct_message.update(all_extras)
severity = LogSeverity(severity).name
logger.log_struct(struct_message, severity=severity)
break
except Exception: # pylint: disable=broad-except
# We really dont want do to do anything here except sleep here,
# since we cant log it out as log itself is already failing
time.sleep(utils.get_retry_delay(num_try, RETRY_DELAY, BACKOFF))


def error(message, *args, extras=None, logger=None):
"""Logs |message| to stackdriver logging and error reporting (including
exception if there was one."""

@retry.wrap(NUM_RETRIES,
RETRY_DELAY,
'common.logs.error._report_error_with_retries',
log_retries=False)
def _report_error_with_retries(message):
if utils.is_local():
return
_error_reporting_client.report(message)

# Custom retry logic to avoid circular dependency
# as retry from retry.py uses log
for num_try in range(1, NUM_ATTEMPTS + 1):
try:
_error_reporting_client.report(message)
break
except Exception: # pylint: disable=broad-except
# We really dont want do to do anything here except sleep here,
# since we cant log it out as log itself is already failing
time.sleep(utils.get_retry_delay(num_try, RETRY_DELAY, BACKOFF))

if not any(sys.exc_info()):
_report_error_with_retries(message % args)
Expand Down
20 changes: 7 additions & 13 deletions common/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,23 @@
import sys
import time

from common import logs
from common import utils


def sleep(seconds):
"""Invoke time.sleep."""
time.sleep(seconds)


def get_delay(num_try, delay, backoff):
"""Compute backoff delay."""
return delay * (backoff**(num_try - 1))


def wrap( # pylint: disable=too-many-arguments
retries,
delay,
function,
backoff=2,
exception_type=Exception,
log_retries=True,
retry_on_false=False):
"""Retry decorator for a function."""
# To avoid circular dependency.
from common import logs # pylint:disable=import-outside-toplevel

assert delay > 0
assert backoff >= 1
Expand All @@ -60,11 +55,10 @@ def handle_retry(num_try, exception=None):

if (exception is None or
isinstance(exception, exception_type)) and num_try < tries:
if log_retries:
logs.info('Retrying on %s failed with %s. Retrying again.',
function_with_type,
sys.exc_info()[1])
sleep(get_delay(num_try, delay, backoff))
logs.info('Retrying on %s failed with %s. Retrying again.',
function_with_type,
sys.exc_info()[1])
sleep(utils.get_retry_delay(num_try, delay, backoff))
return True

logs.error('Retrying on %s failed with %s. Raise.',
Expand Down
31 changes: 31 additions & 0 deletions common/test_common_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for utils.py"""

from common import utils


def test_get_retry_delay():
""""Tests if get delay is working as expected"""
delay = 3
backoff = 2

first_try = 1
first_try_delay = utils.get_retry_delay(first_try, delay, backoff)
# Backoff should have no effect on first try
assert first_try_delay == delay

second_try = 2
second_try_delay = utils.get_retry_delay(second_try, delay, backoff)
assert second_try_delay == delay * backoff
7 changes: 7 additions & 0 deletions common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ def file_hash(file_path):
chunk = file_handle.read(chunk_size)

return digest.hexdigest()


# Calculate a retry delay based on the current try,
# a default delay, and an exponential backoff
def get_retry_delay(num_try, delay, backoff):
"""Compute backoff delay."""
return delay * (backoff**(num_try - 1))

0 comments on commit 162ca0c

Please sign in to comment.